annotate openms_exporter.py @ 3:8fa16707e162 draft

Update.
author galaxyp
date Fri, 10 May 2013 17:31:59 -0400
parents cf0d72c7b482
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
1 from xml.sax import make_parser, ContentHandler
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
2 from optparse import OptionParser
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
3
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
4
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
5 def main():
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
6 (options, _) = _parse_args()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
7 with open(options.output, "w") as out:
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
8 parser = make_parser()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
9 handler = _get_handler(options)(out)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
10 parser.setContentHandler(handler)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
11 parser.parse(open(options.input, "r"))
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
12
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
13
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
14 def _get_handler(options):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
15 return handlers[options.type]
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
16
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
17
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
18 class OpenMsContentHandler(ContentHandler):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
19
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
20 def __record_values(self, keys, attrs):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
21 for key in keys:
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
22 setattr(self, key, attrs.get(key, None))
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
23
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
24 def _get_values(self, keys):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
25 return [getattr(self, key, "") for key in keys]
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
26
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
27 def _set_attributes(self, name, attrs):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
28 for element_name, element_attributes in self.record_values.iteritems():
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
29 if name == element_name:
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
30 self.__record_values(element_attributes, attrs)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
31
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
32 def _write_line(self, line):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
33 self.output.write(line)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
34 self.output.write("\n")
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
35
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
36 def startElement(self, name, attrs):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
37 self._set_attributes(name, attrs)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
38
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
39 def _handleElement(self, name):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
40 pass
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
41
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
42 def endElement(self, name):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
43 self._handleElement(name)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
44 self._set_attributes(name, {})
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
45
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
46 def _write_row(self, col_keys):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
47 row_values = self._get_values(col_keys)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
48 row = "\t".join(row_values)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
49 self._write_line(row)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
50
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
51
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
52 class FeatureHullHandler(OpenMsContentHandler):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
53 record_values = {
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
54 "feature": ["id"],
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
55 "convexhull": ["nr"],
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
56 "pt": ["x", "y"]
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
57 }
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
58
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
59 def __init__(self, output):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
60 self.output = output
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
61
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
62 def _handleElement(self, name):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
63 if name == "pt":
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
64 self._write_point()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
65
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
66 def _write_point(self):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
67 col_keys = ["id", "nr", "x", "y"]
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
68 self._write_row(col_keys)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
69
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
70
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
71 class PeptideHandler(OpenMsContentHandler):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
72 record_values = {
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
73 "IdentificationRun": ["search_engine"],
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
74 "PeptideIdentification": ["score_type", "significance_threshold", "MZ", "RT"],
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
75 "PeptideHit": ["score", "sequence", "charge"],
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
76 }
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
77
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
78 def __init__(self, output):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
79 self.output = output
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
80
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
81 def _handleElement(self, name):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
82 if name == "PeptideHit":
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
83 self._write_peptide()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
84
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
85 def _write_peptide(self):
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
86 col_keys = ["score", "sequence", "score_type", "charge", "MZ", "RT"]
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
87 self._write_row(col_keys)
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
88
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
89
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
90 handlers = {
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
91 "peptide": PeptideHandler,
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
92 "feature_hull": FeatureHullHandler,
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
93 }
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
94
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
95
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
96 def _parse_args():
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
97 parser = OptionParser()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
98 parser.add_option("--input", dest="input")
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
99 parser.add_option("--output", dest="output")
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
100 parser.add_option("--type", dest="type", choices=["peptide", "feature_hull"])
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
101 return parser.parse_args()
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
102
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
103 if __name__ == "__main__":
cf0d72c7b482 Update.
galaxyp
parents:
diff changeset
104 main()