Mercurial > repos > galaxyp > maxquant
comparison augment_maxquant_mods.py @ 0:d4b6c9eae635 draft
Initial commit.
| author | galaxyp |
|---|---|
| date | Fri, 10 May 2013 17:22:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d4b6c9eae635 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Usage: | |
| 4 python augment_maxquant_mods.py | |
| 5 | |
| 6 Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications | |
| 7 file (modifications.xml) are in this same directory, this script will | |
| 8 create a new MaxQuant modifications file (extended_modifications.xml) | |
| 9 with an a new modification for each unimod entry. These new entires | |
| 10 will be suffixed with [Unimod] to distinguish them from existing | |
| 11 MaxQuant entries. This file should be copied to | |
| 12 <MaxQuant Path>\bin\conf\modifications.xml | |
| 13 | |
| 14 """ | |
| 15 import xml.etree.ElementTree as ET | |
| 16 import re | |
| 17 | |
| 18 FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00" | |
| 19 | |
| 20 POSITION_MAP = { | |
| 21 "Anywhere": "anywhere", | |
| 22 "Any N-term": "anyNterm", | |
| 23 "Any C-term": "anyCterm", | |
| 24 "Protein N-term": "proteinNterm", | |
| 25 "Protein C-term": "proteinCterm", | |
| 26 } | |
| 27 | |
| 28 unimod_tree = ET.parse('unimod.xml') | |
| 29 unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}' | |
| 30 unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns) | |
| 31 mq_tree = ET.parse("modifications.xml") | |
| 32 mq_root = mq_tree.getroot() | |
| 33 | |
| 34 | |
| 35 def to_label(title, site): | |
| 36 return "%s (%s) [Unimod]" % (title, site) | |
| 37 | |
| 38 | |
| 39 def copy_modification(unimod_modification): | |
| 40 if unimod_modification.hidden: | |
| 41 return False | |
| 42 if unimod_modification.delta_el is None: | |
| 43 return False | |
| 44 comp_array = unimod_modification.composition_array | |
| 45 for aa, count in comp_array: | |
| 46 if len(aa) > 1 and aa not in COMP_REPLACES.keys(): | |
| 47 # Complex stuff like Hep, that I cannot translate into MaxQuant. | |
| 48 return False | |
| 49 return True | |
| 50 | |
| 51 | |
| 52 COMP_REPLACES = { | |
| 53 "15N": "Nx", | |
| 54 "13C": "Cx", | |
| 55 "18O": "Ox", | |
| 56 "2H": "Hx", | |
| 57 } | |
| 58 | |
| 59 ## HEP? | |
| 60 | |
| 61 | |
| 62 def convert_composition(unimod_composition): | |
| 63 """ | |
| 64 Convert Unimod representation of composition to MaxQuant | |
| 65 """ | |
| 66 composition = unimod_composition | |
| 67 for key, value in COMP_REPLACES.iteritems(): | |
| 68 composition = composition.replace(key, value) | |
| 69 print composition | |
| 70 return composition | |
| 71 | |
| 72 | |
| 73 def populate_modification(modification, unimod_modification): | |
| 74 """ | |
| 75 Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``. | |
| 76 """ | |
| 77 attrib = modification.attrib | |
| 78 attrib["create_date"] = FAKE_DATE | |
| 79 attrib["last_modified_date"] = FAKE_DATE | |
| 80 attrib["reporterCorrectionM1"] = str(0) | |
| 81 attrib["reporterCorrectionM2"] = str(0) | |
| 82 attrib["reporterCorrectionP1"] = str(0) | |
| 83 attrib["reporterCorrectionP2"] = str(0) | |
| 84 attrib["user"] = "build_mods_script" | |
| 85 label = unimod_modification.label | |
| 86 attrib["title"] = label | |
| 87 attrib["description"] = label | |
| 88 attrib["composition"] = convert_composition(unimod_modification.raw_composition) | |
| 89 unimod_position = unimod_modification.position | |
| 90 maxquant_position = POSITION_MAP[unimod_position] | |
| 91 assert maxquant_position != None | |
| 92 position_el = ET.SubElement(modification, "position") | |
| 93 position_el.text = maxquant_position | |
| 94 modification_site_el = ET.SubElement(modification, "modification_site") | |
| 95 modification_site_el.attrib["index"] = "0" | |
| 96 unimod_site = unimod_modification.site | |
| 97 modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site | |
| 98 type_el = ET.SubElement(modification, "type") | |
| 99 type_el.text = "standard" | |
| 100 return modification | |
| 101 | |
| 102 | |
| 103 class UnimodModification: | |
| 104 | |
| 105 def __init__(self, modification, specificity): | |
| 106 self.modification = modification | |
| 107 self.specificity = specificity | |
| 108 | |
| 109 @property | |
| 110 def title(self): | |
| 111 return self.modification.attrib["title"] | |
| 112 | |
| 113 @property | |
| 114 def site(self): | |
| 115 return self.specificity.attrib["site"] | |
| 116 | |
| 117 @property | |
| 118 def label(self): | |
| 119 return "%s (%s) [Unimod]" % (self.title, self.site) | |
| 120 | |
| 121 @property | |
| 122 def delta_el(self): | |
| 123 return self.modification.find("%sdelta" % unimod_ns) | |
| 124 | |
| 125 @property | |
| 126 def raw_composition(self): | |
| 127 return self.delta_el.attrib["composition"] | |
| 128 | |
| 129 @property | |
| 130 def composition_array(self): | |
| 131 raw_composition = self.raw_composition | |
| 132 aa_and_counts = re.split("\s+", raw_composition) | |
| 133 comp_array = [] | |
| 134 for aa_and_count in aa_and_counts: | |
| 135 match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count) | |
| 136 aa = match.group(1) | |
| 137 count = match.group(3) or 1 | |
| 138 comp_array.append((aa, count)) | |
| 139 return comp_array | |
| 140 | |
| 141 @property | |
| 142 def position(self): | |
| 143 return self.specificity.attrib["position"] | |
| 144 | |
| 145 @property | |
| 146 def hidden(self): | |
| 147 return self.specificity.attrib["hidden"] == "true" | |
| 148 | |
| 149 unimod_modifications = [] | |
| 150 for mod in unimod_modifications_el.findall('%smod' % unimod_ns): | |
| 151 for specificity in mod.findall('%sspecificity' % unimod_ns): | |
| 152 unimod_modifications.append(UnimodModification(mod, specificity)) | |
| 153 | |
| 154 max_index = 0 | |
| 155 for modification in mq_root.getchildren(): | |
| 156 index = int(modification.attrib["index"]) | |
| 157 max_index = max(max_index, index) | |
| 158 | |
| 159 for unimod_modification in unimod_modifications: | |
| 160 if copy_modification(unimod_modification): | |
| 161 print unimod_modification.composition_array | |
| 162 max_index += 1 | |
| 163 modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)}) | |
| 164 populate_modification(modification, unimod_modification) | |
| 165 | |
| 166 mq_tree.write("extended_modifications.xml") |
