Mercurial > repos > galaxyp > maxquant
diff augment_maxquant_mods.py @ 0:d4b6c9eae635 draft
Initial commit.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:22:51 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/augment_maxquant_mods.py Fri May 10 17:22:51 2013 -0400 @@ -0,0 +1,166 @@ +#!/usr/bin/env python +""" +Usage: + python augment_maxquant_mods.py + +Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications +file (modifications.xml) are in this same directory, this script will +create a new MaxQuant modifications file (extended_modifications.xml) +with an a new modification for each unimod entry. These new entires +will be suffixed with [Unimod] to distinguish them from existing +MaxQuant entries. This file should be copied to +<MaxQuant Path>\bin\conf\modifications.xml + +""" +import xml.etree.ElementTree as ET +import re + +FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00" + +POSITION_MAP = { + "Anywhere": "anywhere", + "Any N-term": "anyNterm", + "Any C-term": "anyCterm", + "Protein N-term": "proteinNterm", + "Protein C-term": "proteinCterm", +} + +unimod_tree = ET.parse('unimod.xml') +unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}' +unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns) +mq_tree = ET.parse("modifications.xml") +mq_root = mq_tree.getroot() + + +def to_label(title, site): + return "%s (%s) [Unimod]" % (title, site) + + +def copy_modification(unimod_modification): + if unimod_modification.hidden: + return False + if unimod_modification.delta_el is None: + return False + comp_array = unimod_modification.composition_array + for aa, count in comp_array: + if len(aa) > 1 and aa not in COMP_REPLACES.keys(): + # Complex stuff like Hep, that I cannot translate into MaxQuant. + return False + return True + + +COMP_REPLACES = { + "15N": "Nx", + "13C": "Cx", + "18O": "Ox", + "2H": "Hx", +} + +## HEP? + + +def convert_composition(unimod_composition): + """ + Convert Unimod representation of composition to MaxQuant + """ + composition = unimod_composition + for key, value in COMP_REPLACES.iteritems(): + composition = composition.replace(key, value) + print composition + return composition + + +def populate_modification(modification, unimod_modification): + """ + Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``. + """ + attrib = modification.attrib + attrib["create_date"] = FAKE_DATE + attrib["last_modified_date"] = FAKE_DATE + attrib["reporterCorrectionM1"] = str(0) + attrib["reporterCorrectionM2"] = str(0) + attrib["reporterCorrectionP1"] = str(0) + attrib["reporterCorrectionP2"] = str(0) + attrib["user"] = "build_mods_script" + label = unimod_modification.label + attrib["title"] = label + attrib["description"] = label + attrib["composition"] = convert_composition(unimod_modification.raw_composition) + unimod_position = unimod_modification.position + maxquant_position = POSITION_MAP[unimod_position] + assert maxquant_position != None + position_el = ET.SubElement(modification, "position") + position_el.text = maxquant_position + modification_site_el = ET.SubElement(modification, "modification_site") + modification_site_el.attrib["index"] = "0" + unimod_site = unimod_modification.site + modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site + type_el = ET.SubElement(modification, "type") + type_el.text = "standard" + return modification + + +class UnimodModification: + + def __init__(self, modification, specificity): + self.modification = modification + self.specificity = specificity + + @property + def title(self): + return self.modification.attrib["title"] + + @property + def site(self): + return self.specificity.attrib["site"] + + @property + def label(self): + return "%s (%s) [Unimod]" % (self.title, self.site) + + @property + def delta_el(self): + return self.modification.find("%sdelta" % unimod_ns) + + @property + def raw_composition(self): + return self.delta_el.attrib["composition"] + + @property + def composition_array(self): + raw_composition = self.raw_composition + aa_and_counts = re.split("\s+", raw_composition) + comp_array = [] + for aa_and_count in aa_and_counts: + match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count) + aa = match.group(1) + count = match.group(3) or 1 + comp_array.append((aa, count)) + return comp_array + + @property + def position(self): + return self.specificity.attrib["position"] + + @property + def hidden(self): + return self.specificity.attrib["hidden"] == "true" + +unimod_modifications = [] +for mod in unimod_modifications_el.findall('%smod' % unimod_ns): + for specificity in mod.findall('%sspecificity' % unimod_ns): + unimod_modifications.append(UnimodModification(mod, specificity)) + +max_index = 0 +for modification in mq_root.getchildren(): + index = int(modification.attrib["index"]) + max_index = max(max_index, index) + +for unimod_modification in unimod_modifications: + if copy_modification(unimod_modification): + print unimod_modification.composition_array + max_index += 1 + modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)}) + populate_modification(modification, unimod_modification) + +mq_tree.write("extended_modifications.xml")