0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Usage:
|
|
4 python augment_maxquant_mods.py
|
|
5
|
|
6 Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications
|
|
7 file (modifications.xml) are in this same directory, this script will
|
|
8 create a new MaxQuant modifications file (extended_modifications.xml)
|
|
9 with an a new modification for each unimod entry. These new entires
|
|
10 will be suffixed with [Unimod] to distinguish them from existing
|
|
11 MaxQuant entries. This file should be copied to
|
|
12 <MaxQuant Path>\bin\conf\modifications.xml
|
|
13
|
|
14 """
|
|
15 import xml.etree.ElementTree as ET
|
|
16 import re
|
|
17
|
|
18 FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00"
|
|
19
|
|
20 POSITION_MAP = {
|
|
21 "Anywhere": "anywhere",
|
|
22 "Any N-term": "anyNterm",
|
|
23 "Any C-term": "anyCterm",
|
|
24 "Protein N-term": "proteinNterm",
|
|
25 "Protein C-term": "proteinCterm",
|
|
26 }
|
|
27
|
|
28 unimod_tree = ET.parse('unimod.xml')
|
|
29 unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}'
|
|
30 unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns)
|
|
31 mq_tree = ET.parse("modifications.xml")
|
|
32 mq_root = mq_tree.getroot()
|
|
33
|
|
34
|
|
35 def to_label(title, site):
|
|
36 return "%s (%s) [Unimod]" % (title, site)
|
|
37
|
|
38
|
|
39 def copy_modification(unimod_modification):
|
|
40 if unimod_modification.hidden:
|
|
41 return False
|
|
42 if unimod_modification.delta_el is None:
|
|
43 return False
|
|
44 comp_array = unimod_modification.composition_array
|
|
45 for aa, count in comp_array:
|
|
46 if len(aa) > 1 and aa not in COMP_REPLACES.keys():
|
|
47 # Complex stuff like Hep, that I cannot translate into MaxQuant.
|
|
48 return False
|
|
49 return True
|
|
50
|
|
51
|
|
52 COMP_REPLACES = {
|
|
53 "15N": "Nx",
|
|
54 "13C": "Cx",
|
|
55 "18O": "Ox",
|
|
56 "2H": "Hx",
|
|
57 }
|
|
58
|
|
59 ## HEP?
|
|
60
|
|
61
|
|
62 def convert_composition(unimod_composition):
|
|
63 """
|
|
64 Convert Unimod representation of composition to MaxQuant
|
|
65 """
|
|
66 composition = unimod_composition
|
|
67 for key, value in COMP_REPLACES.iteritems():
|
|
68 composition = composition.replace(key, value)
|
|
69 print composition
|
|
70 return composition
|
|
71
|
|
72
|
|
73 def populate_modification(modification, unimod_modification):
|
|
74 """
|
|
75 Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``.
|
|
76 """
|
|
77 attrib = modification.attrib
|
|
78 attrib["create_date"] = FAKE_DATE
|
|
79 attrib["last_modified_date"] = FAKE_DATE
|
|
80 attrib["reporterCorrectionM1"] = str(0)
|
|
81 attrib["reporterCorrectionM2"] = str(0)
|
|
82 attrib["reporterCorrectionP1"] = str(0)
|
|
83 attrib["reporterCorrectionP2"] = str(0)
|
|
84 attrib["user"] = "build_mods_script"
|
|
85 label = unimod_modification.label
|
|
86 attrib["title"] = label
|
|
87 attrib["description"] = label
|
|
88 attrib["composition"] = convert_composition(unimod_modification.raw_composition)
|
|
89 unimod_position = unimod_modification.position
|
|
90 maxquant_position = POSITION_MAP[unimod_position]
|
|
91 assert maxquant_position != None
|
|
92 position_el = ET.SubElement(modification, "position")
|
|
93 position_el.text = maxquant_position
|
|
94 modification_site_el = ET.SubElement(modification, "modification_site")
|
|
95 modification_site_el.attrib["index"] = "0"
|
|
96 unimod_site = unimod_modification.site
|
|
97 modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site
|
|
98 type_el = ET.SubElement(modification, "type")
|
|
99 type_el.text = "standard"
|
|
100 return modification
|
|
101
|
|
102
|
|
103 class UnimodModification:
|
|
104
|
|
105 def __init__(self, modification, specificity):
|
|
106 self.modification = modification
|
|
107 self.specificity = specificity
|
|
108
|
|
109 @property
|
|
110 def title(self):
|
|
111 return self.modification.attrib["title"]
|
|
112
|
|
113 @property
|
|
114 def site(self):
|
|
115 return self.specificity.attrib["site"]
|
|
116
|
|
117 @property
|
|
118 def label(self):
|
|
119 return "%s (%s) [Unimod]" % (self.title, self.site)
|
|
120
|
|
121 @property
|
|
122 def delta_el(self):
|
|
123 return self.modification.find("%sdelta" % unimod_ns)
|
|
124
|
|
125 @property
|
|
126 def raw_composition(self):
|
|
127 return self.delta_el.attrib["composition"]
|
|
128
|
|
129 @property
|
|
130 def composition_array(self):
|
|
131 raw_composition = self.raw_composition
|
|
132 aa_and_counts = re.split("\s+", raw_composition)
|
|
133 comp_array = []
|
|
134 for aa_and_count in aa_and_counts:
|
|
135 match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count)
|
|
136 aa = match.group(1)
|
|
137 count = match.group(3) or 1
|
|
138 comp_array.append((aa, count))
|
|
139 return comp_array
|
|
140
|
|
141 @property
|
|
142 def position(self):
|
|
143 return self.specificity.attrib["position"]
|
|
144
|
|
145 @property
|
|
146 def hidden(self):
|
|
147 return self.specificity.attrib["hidden"] == "true"
|
|
148
|
|
149 unimod_modifications = []
|
|
150 for mod in unimod_modifications_el.findall('%smod' % unimod_ns):
|
|
151 for specificity in mod.findall('%sspecificity' % unimod_ns):
|
|
152 unimod_modifications.append(UnimodModification(mod, specificity))
|
|
153
|
|
154 max_index = 0
|
|
155 for modification in mq_root.getchildren():
|
|
156 index = int(modification.attrib["index"])
|
|
157 max_index = max(max_index, index)
|
|
158
|
|
159 for unimod_modification in unimod_modifications:
|
|
160 if copy_modification(unimod_modification):
|
|
161 print unimod_modification.composition_array
|
|
162 max_index += 1
|
|
163 modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)})
|
|
164 populate_modification(modification, unimod_modification)
|
|
165
|
|
166 mq_tree.write("extended_modifications.xml")
|