Mercurial > repos > galaxyp > openms
diff openms_exporter.py @ 2:cf0d72c7b482 draft
Update.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:31:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/openms_exporter.py Fri May 10 17:31:05 2013 -0400 @@ -0,0 +1,104 @@ +from xml.sax import make_parser, ContentHandler +from optparse import OptionParser + + +def main(): + (options, _) = _parse_args() + with open(options.output, "w") as out: + parser = make_parser() + handler = _get_handler(options)(out) + parser.setContentHandler(handler) + parser.parse(open(options.input, "r")) + + +def _get_handler(options): + return handlers[options.type] + + +class OpenMsContentHandler(ContentHandler): + + def __record_values(self, keys, attrs): + for key in keys: + setattr(self, key, attrs.get(key, None)) + + def _get_values(self, keys): + return [getattr(self, key, "") for key in keys] + + def _set_attributes(self, name, attrs): + for element_name, element_attributes in self.record_values.iteritems(): + if name == element_name: + self.__record_values(element_attributes, attrs) + + def _write_line(self, line): + self.output.write(line) + self.output.write("\n") + + def startElement(self, name, attrs): + self._set_attributes(name, attrs) + + def _handleElement(self, name): + pass + + def endElement(self, name): + self._handleElement(name) + self._set_attributes(name, {}) + + def _write_row(self, col_keys): + row_values = self._get_values(col_keys) + row = "\t".join(row_values) + self._write_line(row) + + +class FeatureHullHandler(OpenMsContentHandler): + record_values = { + "feature": ["id"], + "convexhull": ["nr"], + "pt": ["x", "y"] + } + + def __init__(self, output): + self.output = output + + def _handleElement(self, name): + if name == "pt": + self._write_point() + + def _write_point(self): + col_keys = ["id", "nr", "x", "y"] + self._write_row(col_keys) + + +class PeptideHandler(OpenMsContentHandler): + record_values = { + "IdentificationRun": ["search_engine"], + "PeptideIdentification": ["score_type", "significance_threshold", "MZ", "RT"], + "PeptideHit": ["score", "sequence", "charge"], + } + + def __init__(self, output): + self.output = output + + def _handleElement(self, name): + if name == "PeptideHit": + self._write_peptide() + + def _write_peptide(self): + col_keys = ["score", "sequence", "score_type", "charge", "MZ", "RT"] + self._write_row(col_keys) + + +handlers = { + "peptide": PeptideHandler, + "feature_hull": FeatureHullHandler, +} + + +def _parse_args(): + parser = OptionParser() + parser.add_option("--input", dest="input") + parser.add_option("--output", dest="output") + parser.add_option("--type", dest="type", choices=["peptide", "feature_hull"]) + return parser.parse_args() + +if __name__ == "__main__": + main()