Source code for gocats.tools

# !/usr/bin/python3
"""
Functions for handling some file input and output and reformatting tasks in GOcats.
"""
import json
import jsonpickle
import sys
import os
import re
import csv
maxInt = sys.maxsize

while True:
    # decrease the maxInt value by factor 10
    # as long as the OverflowError occurs.

    try:
        csv.field_size_limit(maxInt)
        break
    except OverflowError:
        maxInt = int(maxInt/10)


# TODO: move to using JSON not JsonPickle and use sort_keys=True parameter to test outputs between runs
[docs]def json_save(obj, filename): """Takes a Python object, converts it into a JSON serializable object (if it is not already), and saves it to a file that is specified. :param obj: A Python :py:obj:`obj`. :param file_handle filename: A path to output the resulting JSON file. """ if type(obj) == str or type(obj) == list: json_obj = obj elif type(obj) == dict: json_obj = dict() for key, value in obj.items(): if type(value) == str or type(value) == list: new_value = value elif type(value) == set: new_value = [item for item in value] else: raise Exception("Data type is not supported!") json_obj[key] = new_value elif type(obj) == set: json_obj = [item for item in obj] else: raise Exception("Data type is not supported!") with open(filename+".json", 'w') as json_file: json_text = json.dumps(json_obj, sort_keys=True) json_file.write(json_text)
[docs]def jsonpickle_save(obj, filename): """Takes a Python object, converts it into a JsonPickle string, and writes it out to a file. :param obj: A Python :py:obj:`obj` :param file_handle filename: A path to output the resulting JsonPickle file. """ f = open(filename+".json_pickle", 'w') json_obj = jsonpickle.encode(obj, keys=True) # Use_jsonpickle=True used to prevent jsonPickle from encoding dictkeys to strings. f.write(json_obj) f.close()
[docs]def jsonpickle_load(filename): """Takes a JsonPickle file and loads in the JsonPickle object into a Python object. :param file_handle filename: A path to a JsonPickle file. """ f = open(filename) json_str = f.read() obj = jsonpickle.decode(json_str, keys=True) # Use_jsonpickle=True used to prevent jsonPickle from encoding dictkeys to strings. return obj
[docs]def list_to_file(filename, data): """Makes a text document from a :py:obj:`list` of data, with each line of the document being one item from the list and outputs the document into a file. :param file_handle filename: A path to the output file. :param data: A Python :py:obj:`list`. """ with open(filename + ".txt", 'wt') as out_file: for line in data: out_file.write(str(line) + '\n')
# Functions for handling Gene Annotation Files
[docs]def write_out_gaf(data, filename): """Writes out an object representing a Gene Annotation File (GAF) to a file. :param list data: A :py:obj:`list` object representing a GAF. Each item in the list represents a row. :param file_handle filename: A path and name for the GAF. """ with open(filename, 'w') as gaf_file: gafwriter = csv.writer(gaf_file, delimiter='\t') for line in data: gafwriter.writerow([item for item in line])
[docs]def parse_gaf(filename): """Converts a Gene Annotation File (GAF) into a :py:obj:`list` object where every item is a row from the GAF. :param file_handle filename: Specify the location of the GAF. :return: A list representing the GAF. :rtype: :py:obj:`list` """ comment_line = re.compile('^!') gaf_array = list() with open(os.path.realpath(filename)) as gaf_file: for line in csv.reader(gaf_file, delimiter='\t'): if not re.match(comment_line, str(line[0])): gaf_array.append(line) return gaf_array