Mercurial > repos > galaxyp > openms
diff idxml_exporter.py @ 0:ba86fd127f5a draft
Uploaded
author | galaxyp |
---|---|
date | Wed, 19 Dec 2012 00:32:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/idxml_exporter.py Wed Dec 19 00:32:25 2012 -0500 @@ -0,0 +1,68 @@ +from xml.sax import make_parser, ContentHandler +from optparse import OptionParser + + +def main(): + (options, _) = _parse_args() + with open(options.output, "w") as out: + parser = make_parser() + handler = _get_handler(options, out) + parser.setContentHandler(handler) + parser.parse(open(options.input, "r")) + + +def _get_handler(option, out): + return PeptideHandler(out) + + +class PeptideHandler(ContentHandler): + record_values = { + "IdentificationRun": ["search_engine"], + "PeptideIdentification": ["score_type", "significance_threshold", "MZ", "RT"], + "PeptideHit": ["score", "sequence", "charge"], + } + + def __init__(self, output): + self.output = output + + def __record_values(self, keys, attrs): + for key in keys: + setattr(self, key, attrs.get(key, None)) + + def startElement(self, name, attrs): + self._set_attributes(name, attrs) + + def endElement(self, name): + if name == "PeptideHit": + self._write_peptide() + # reset values for element + self._set_attributes(name, {}) + + def _write_peptide(self): + col_keys = ["score", "peptide", "score_type", "charge", "MZ", "RT"] + row_values = self._get_values(col_keys) + row = "\t".join(row_values) + self._write_line(row) + + def _write_line(self, line): + self.output.write(line) + self.output.write("\n") + + def _get_values(self, keys): + return [getattr(self, key, "") for key in keys] + + def _set_attributes(self, name, attrs): + for element_name, element_attributes in self.record_values.iteritems(): + if name == element_name: + self.__record_values(element_attributes, attrs) + + +def _parse_args(): + parser = OptionParser() + parser.add_option("--input", dest="input") + parser.add_option("--output", dest="output") + parser.add_option("--type", dest="type", choices=["peptide"]) + return parser.parse_args() + +if __name__ == "__main__": + main()