Mercurial > repos > galaxyp > maxquant
comparison augment_maxquant_mods.py @ 0:d4b6c9eae635 draft
Initial commit.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:22:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d4b6c9eae635 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Usage: | |
4 python augment_maxquant_mods.py | |
5 | |
6 Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications | |
7 file (modifications.xml) are in this same directory, this script will | |
8 create a new MaxQuant modifications file (extended_modifications.xml) | |
9 with an a new modification for each unimod entry. These new entires | |
10 will be suffixed with [Unimod] to distinguish them from existing | |
11 MaxQuant entries. This file should be copied to | |
12 <MaxQuant Path>\bin\conf\modifications.xml | |
13 | |
14 """ | |
15 import xml.etree.ElementTree as ET | |
16 import re | |
17 | |
18 FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00" | |
19 | |
20 POSITION_MAP = { | |
21 "Anywhere": "anywhere", | |
22 "Any N-term": "anyNterm", | |
23 "Any C-term": "anyCterm", | |
24 "Protein N-term": "proteinNterm", | |
25 "Protein C-term": "proteinCterm", | |
26 } | |
27 | |
28 unimod_tree = ET.parse('unimod.xml') | |
29 unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}' | |
30 unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns) | |
31 mq_tree = ET.parse("modifications.xml") | |
32 mq_root = mq_tree.getroot() | |
33 | |
34 | |
35 def to_label(title, site): | |
36 return "%s (%s) [Unimod]" % (title, site) | |
37 | |
38 | |
39 def copy_modification(unimod_modification): | |
40 if unimod_modification.hidden: | |
41 return False | |
42 if unimod_modification.delta_el is None: | |
43 return False | |
44 comp_array = unimod_modification.composition_array | |
45 for aa, count in comp_array: | |
46 if len(aa) > 1 and aa not in COMP_REPLACES.keys(): | |
47 # Complex stuff like Hep, that I cannot translate into MaxQuant. | |
48 return False | |
49 return True | |
50 | |
51 | |
52 COMP_REPLACES = { | |
53 "15N": "Nx", | |
54 "13C": "Cx", | |
55 "18O": "Ox", | |
56 "2H": "Hx", | |
57 } | |
58 | |
59 ## HEP? | |
60 | |
61 | |
62 def convert_composition(unimod_composition): | |
63 """ | |
64 Convert Unimod representation of composition to MaxQuant | |
65 """ | |
66 composition = unimod_composition | |
67 for key, value in COMP_REPLACES.iteritems(): | |
68 composition = composition.replace(key, value) | |
69 print composition | |
70 return composition | |
71 | |
72 | |
73 def populate_modification(modification, unimod_modification): | |
74 """ | |
75 Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``. | |
76 """ | |
77 attrib = modification.attrib | |
78 attrib["create_date"] = FAKE_DATE | |
79 attrib["last_modified_date"] = FAKE_DATE | |
80 attrib["reporterCorrectionM1"] = str(0) | |
81 attrib["reporterCorrectionM2"] = str(0) | |
82 attrib["reporterCorrectionP1"] = str(0) | |
83 attrib["reporterCorrectionP2"] = str(0) | |
84 attrib["user"] = "build_mods_script" | |
85 label = unimod_modification.label | |
86 attrib["title"] = label | |
87 attrib["description"] = label | |
88 attrib["composition"] = convert_composition(unimod_modification.raw_composition) | |
89 unimod_position = unimod_modification.position | |
90 maxquant_position = POSITION_MAP[unimod_position] | |
91 assert maxquant_position != None | |
92 position_el = ET.SubElement(modification, "position") | |
93 position_el.text = maxquant_position | |
94 modification_site_el = ET.SubElement(modification, "modification_site") | |
95 modification_site_el.attrib["index"] = "0" | |
96 unimod_site = unimod_modification.site | |
97 modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site | |
98 type_el = ET.SubElement(modification, "type") | |
99 type_el.text = "standard" | |
100 return modification | |
101 | |
102 | |
103 class UnimodModification: | |
104 | |
105 def __init__(self, modification, specificity): | |
106 self.modification = modification | |
107 self.specificity = specificity | |
108 | |
109 @property | |
110 def title(self): | |
111 return self.modification.attrib["title"] | |
112 | |
113 @property | |
114 def site(self): | |
115 return self.specificity.attrib["site"] | |
116 | |
117 @property | |
118 def label(self): | |
119 return "%s (%s) [Unimod]" % (self.title, self.site) | |
120 | |
121 @property | |
122 def delta_el(self): | |
123 return self.modification.find("%sdelta" % unimod_ns) | |
124 | |
125 @property | |
126 def raw_composition(self): | |
127 return self.delta_el.attrib["composition"] | |
128 | |
129 @property | |
130 def composition_array(self): | |
131 raw_composition = self.raw_composition | |
132 aa_and_counts = re.split("\s+", raw_composition) | |
133 comp_array = [] | |
134 for aa_and_count in aa_and_counts: | |
135 match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count) | |
136 aa = match.group(1) | |
137 count = match.group(3) or 1 | |
138 comp_array.append((aa, count)) | |
139 return comp_array | |
140 | |
141 @property | |
142 def position(self): | |
143 return self.specificity.attrib["position"] | |
144 | |
145 @property | |
146 def hidden(self): | |
147 return self.specificity.attrib["hidden"] == "true" | |
148 | |
149 unimod_modifications = [] | |
150 for mod in unimod_modifications_el.findall('%smod' % unimod_ns): | |
151 for specificity in mod.findall('%sspecificity' % unimod_ns): | |
152 unimod_modifications.append(UnimodModification(mod, specificity)) | |
153 | |
154 max_index = 0 | |
155 for modification in mq_root.getchildren(): | |
156 index = int(modification.attrib["index"]) | |
157 max_index = max(max_index, index) | |
158 | |
159 for unimod_modification in unimod_modifications: | |
160 if copy_modification(unimod_modification): | |
161 print unimod_modification.composition_array | |
162 max_index += 1 | |
163 modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)}) | |
164 populate_modification(modification, unimod_modification) | |
165 | |
166 mq_tree.write("extended_modifications.xml") |