annotate manipulate/rename_kcf/rename_kcf.py @ 0:89592faa2875 draft

Uploaded
author chrisb
date Wed, 23 Mar 2016 14:35:56 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
1 __author__ = "Chris Barnett"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
2 __version__ = "0.3"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
3 __license__ = "MIT"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
4
89592faa2875 Uploaded
chrisb
parents:
diff changeset
5 class id_generator():
89592faa2875 Uploaded
chrisb
parents:
diff changeset
6 def __init__(self, counterinit=0):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
7 import itertools
89592faa2875 Uploaded
chrisb
parents:
diff changeset
8
89592faa2875 Uploaded
chrisb
parents:
diff changeset
9 self.generator = itertools.count(counterinit)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
10
89592faa2875 Uploaded
chrisb
parents:
diff changeset
11 def next(self):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
12 return self.generator.next()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
13
89592faa2875 Uploaded
chrisb
parents:
diff changeset
14
89592faa2875 Uploaded
chrisb
parents:
diff changeset
15 def read_meta_kcf(inputstream, prefix="GLY", counterinit=0):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
16 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
17 :param inputstream: the kcf file
89592faa2875 Uploaded
chrisb
parents:
diff changeset
18 :param prefix: the prefix for the entry. GLY by default. keep it short
89592faa2875 Uploaded
chrisb
parents:
diff changeset
19 :param counterinit: entries are numbered starting at counterinit. 0 by default.
89592faa2875 Uploaded
chrisb
parents:
diff changeset
20 read kcf file (which may contain multiple kcf entries) and rename the ENTRY.
89592faa2875 Uploaded
chrisb
parents:
diff changeset
21 often the ENTRY is too long or linearcode (my fault for suggesting this) and kcf files then are not recognised properly
89592faa2875 Uploaded
chrisb
parents:
diff changeset
22 and/or are ignored in MCAW and other analysis tools
89592faa2875 Uploaded
chrisb
parents:
diff changeset
23 duplicates are not checked for. entries are named as GLY(x) where x is generated from a counter which by default starts at 0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
24 :return:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
25 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
26 if inputstream is None or inputstream == [] or inputstream == "":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
27 raise IOError("empty input stream")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
28 counter = id_generator(counterinit)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
29 list_of_kcf_paragraphs = []
89592faa2875 Uploaded
chrisb
parents:
diff changeset
30 kcfpara = None
89592faa2875 Uploaded
chrisb
parents:
diff changeset
31 for line in inputstream:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
32 if "ENTRY" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
33 # . could strip and split the line and remake it, but easier to supplant it
89592faa2875 Uploaded
chrisb
parents:
diff changeset
34 newline = "ENTRY " + str(prefix) + str(counter.next()) + " Glycan\n"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
35 kcfpara = [newline]
89592faa2875 Uploaded
chrisb
parents:
diff changeset
36 elif "///" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
37 kcfpara.append(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
38 list_of_kcf_paragraphs.append(kcfpara)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
39 else:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
40 if kcfpara is not None:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
41 kcfpara.append(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
42 # . sometimes kcf has no /// or final kcf in many has no ////, so add it
89592faa2875 Uploaded
chrisb
parents:
diff changeset
43 if kcfpara not in list_of_kcf_paragraphs:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
44 list_of_kcf_paragraphs.append(kcfpara)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
45
89592faa2875 Uploaded
chrisb
parents:
diff changeset
46 return list_of_kcf_paragraphs # why this list. easier to deal with each glycan as an individual item in the list
89592faa2875 Uploaded
chrisb
parents:
diff changeset
47
89592faa2875 Uploaded
chrisb
parents:
diff changeset
48
89592faa2875 Uploaded
chrisb
parents:
diff changeset
49 def flatten_meta_kcf_list(metakcflist):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
50 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
51
89592faa2875 Uploaded
chrisb
parents:
diff changeset
52 :param metakcflist: a list containing lists of strings
89592faa2875 Uploaded
chrisb
parents:
diff changeset
53 :return: combined kcfs as a large string for saving to file
89592faa2875 Uploaded
chrisb
parents:
diff changeset
54 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
55 import itertools
89592faa2875 Uploaded
chrisb
parents:
diff changeset
56
89592faa2875 Uploaded
chrisb
parents:
diff changeset
57 return "".join(list(itertools.chain(*metakcflist)))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
58
89592faa2875 Uploaded
chrisb
parents:
diff changeset
59
89592faa2875 Uploaded
chrisb
parents:
diff changeset
60 if __name__ == "__main__":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
61 from optparse import OptionParser
89592faa2875 Uploaded
chrisb
parents:
diff changeset
62
89592faa2875 Uploaded
chrisb
parents:
diff changeset
63 usage = "usage: python %prog [options]\n"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
64 parser = OptionParser(usage=usage)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
65 parser.add_option("-i", action="store", type="string", dest="i", default="input",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
66 help="input kcf file (input)")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
67 parser.add_option("-o", action="store", type="string", dest="o", default="output",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
68 help="output kcf file (output)")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
69 parser.add_option("-p", action="store", type="string", dest="p", default="GLY",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
70 help="prefix for glycan entry name change")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
71 parser.add_option("-c", action="store", type="int", dest="c", default=0,
89592faa2875 Uploaded
chrisb
parents:
diff changeset
72 help="starting number for counter for glycan entry")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
73 (options, args) = parser.parse_args()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
74
89592faa2875 Uploaded
chrisb
parents:
diff changeset
75 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
76 inputname = options.i
89592faa2875 Uploaded
chrisb
parents:
diff changeset
77 outputname = options.o
89592faa2875 Uploaded
chrisb
parents:
diff changeset
78 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
79 raise Exception(e, "Please pass an input (kcf) and output filename as arguments")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
80 instream = file(inputname, 'r')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
81 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
82 convertedkcf = read_meta_kcf(instream,prefix=options.p, counterinit=options.c)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
83 with open(outputname, "w") as f:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
84 f.write(flatten_meta_kcf_list(convertedkcf))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
85 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
86 raise e