annotate idxml_exporter.py @ 0:ba86fd127f5a draft

Uploaded
author galaxyp
date Wed, 19 Dec 2012 00:32:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
1 from xml.sax import make_parser, ContentHandler
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
2 from optparse import OptionParser
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
3
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
4
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
5 def main():
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
6 (options, _) = _parse_args()
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
7 with open(options.output, "w") as out:
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
8 parser = make_parser()
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
9 handler = _get_handler(options, out)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
10 parser.setContentHandler(handler)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
11 parser.parse(open(options.input, "r"))
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
12
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
13
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
14 def _get_handler(option, out):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
15 return PeptideHandler(out)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
16
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
17
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
18 class PeptideHandler(ContentHandler):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
19 record_values = {
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
20 "IdentificationRun": ["search_engine"],
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
21 "PeptideIdentification": ["score_type", "significance_threshold", "MZ", "RT"],
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
22 "PeptideHit": ["score", "sequence", "charge"],
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
23 }
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
24
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
25 def __init__(self, output):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
26 self.output = output
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
27
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
28 def __record_values(self, keys, attrs):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
29 for key in keys:
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
30 setattr(self, key, attrs.get(key, None))
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
31
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
32 def startElement(self, name, attrs):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
33 self._set_attributes(name, attrs)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
34
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
35 def endElement(self, name):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
36 if name == "PeptideHit":
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
37 self._write_peptide()
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
38 # reset values for element
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
39 self._set_attributes(name, {})
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
40
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
41 def _write_peptide(self):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
42 col_keys = ["score", "peptide", "score_type", "charge", "MZ", "RT"]
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
43 row_values = self._get_values(col_keys)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
44 row = "\t".join(row_values)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
45 self._write_line(row)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
46
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
47 def _write_line(self, line):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
48 self.output.write(line)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
49 self.output.write("\n")
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
50
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
51 def _get_values(self, keys):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
52 return [getattr(self, key, "") for key in keys]
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
53
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
54 def _set_attributes(self, name, attrs):
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
55 for element_name, element_attributes in self.record_values.iteritems():
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
56 if name == element_name:
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
57 self.__record_values(element_attributes, attrs)
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
58
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
59
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
60 def _parse_args():
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
61 parser = OptionParser()
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
62 parser.add_option("--input", dest="input")
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
63 parser.add_option("--output", dest="output")
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
64 parser.add_option("--type", dest="type", choices=["peptide"])
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
65 return parser.parse_args()
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
66
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
67 if __name__ == "__main__":
ba86fd127f5a Uploaded
galaxyp
parents:
diff changeset
68 main()