view manipulate/rename_kcf/rename_kcf.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
line wrap: on
line source

__author__ = "Chris Barnett"
__version__ = "0.3"
__license__ = "MIT"

class id_generator():
    def __init__(self, counterinit=0):
        import itertools

        self.generator = itertools.count(counterinit)

    def next(self):
        return self.generator.next()


def read_meta_kcf(inputstream, prefix="GLY", counterinit=0):
    """
    :param inputstream: the kcf file
    :param prefix: the prefix for the entry. GLY by default. keep it short
    :param counterinit: entries are numbered starting at counterinit. 0 by default.
    read kcf file (which may contain multiple kcf entries) and rename the ENTRY.
    often the ENTRY is too long or linearcode (my fault for suggesting this) and kcf files then are not recognised properly
    and/or are ignored in MCAW and other analysis tools
    duplicates are not checked for. entries are named as GLY(x) where x is generated from a counter which by default starts at 0
    :return:
    """
    if inputstream is None or inputstream == [] or inputstream == "":
        raise IOError("empty input stream")
    counter = id_generator(counterinit)
    list_of_kcf_paragraphs = []
    kcfpara = None
    for line in inputstream:
        if "ENTRY" in line:
            # . could strip and split the line and remake it, but easier to supplant it
            newline = "ENTRY      " + str(prefix) + str(counter.next()) + "    Glycan\n"
            kcfpara = [newline]
        elif "///" in line:
            kcfpara.append(line)
            list_of_kcf_paragraphs.append(kcfpara)
        else:
            if kcfpara is not None:
                kcfpara.append(line)
    # . sometimes kcf has no /// or final kcf in many has no ////, so add it
    if kcfpara not in list_of_kcf_paragraphs:
        list_of_kcf_paragraphs.append(kcfpara)

    return list_of_kcf_paragraphs  # why this list. easier to deal with each glycan as an individual item in the list


def flatten_meta_kcf_list(metakcflist):
    """

    :param metakcflist:  a list containing lists of strings
    :return: combined kcfs as a large string for saving to file
    """
    import itertools

    return "".join(list(itertools.chain(*metakcflist)))


if __name__ == "__main__":
    from optparse import OptionParser

    usage = "usage: python %prog [options]\n"
    parser = OptionParser(usage=usage)
    parser.add_option("-i", action="store", type="string", dest="i", default="input",
                      help="input kcf file (input)")
    parser.add_option("-o", action="store", type="string", dest="o", default="output",
                      help="output kcf file (output)")
    parser.add_option("-p", action="store", type="string", dest="p", default="GLY",
                      help="prefix for glycan entry name change")
    parser.add_option("-c", action="store", type="int", dest="c", default=0,
                      help="starting number for counter for glycan entry")
    (options, args) = parser.parse_args()

    try:
        inputname = options.i
        outputname = options.o
    except Exception as e:
        raise Exception(e, "Please pass an input (kcf) and output filename as arguments")
    instream = file(inputname, 'r')
    try:
        convertedkcf = read_meta_kcf(instream,prefix=options.p, counterinit=options.c)
        with open(outputname, "w") as f:
            f.write(flatten_meta_kcf_list(convertedkcf))
    except Exception as e:
        raise e