Mercurial > repos > galaxyp > maxquant
comparison maxquant_wrapper.py @ 0:d4b6c9eae635 draft
Initial commit.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:22:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d4b6c9eae635 |
---|---|
1 #!/usr/bin/env python | |
2 import optparse | |
3 import os | |
4 import shutil | |
5 import sys | |
6 import tempfile | |
7 import subprocess | |
8 import logging | |
9 from string import Template | |
10 from xml.sax.saxutils import escape | |
11 import xml.etree.ElementTree as ET | |
12 | |
13 log = logging.getLogger(__name__) | |
14 | |
15 DEBUG = True | |
16 | |
17 working_directory = os.getcwd() | |
18 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name | |
19 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name | |
20 | |
21 | |
22 def stop_err(msg): | |
23 sys.stderr.write("%s\n" % msg) | |
24 sys.exit() | |
25 | |
26 | |
27 def read_stderr(): | |
28 stderr = '' | |
29 if(os.path.exists(tmp_stderr_name)): | |
30 with open(tmp_stderr_name, 'rb') as tmp_stderr: | |
31 buffsize = 1048576 | |
32 try: | |
33 while True: | |
34 stderr += tmp_stderr.read(buffsize) | |
35 if not stderr or len(stderr) % buffsize != 0: | |
36 break | |
37 except OverflowError: | |
38 pass | |
39 return stderr | |
40 | |
41 | |
42 def execute(command, stdin=None): | |
43 try: | |
44 with open(tmp_stderr_name, 'wb') as tmp_stderr: | |
45 with open(tmp_stdout_name, 'wb') as tmp_stdout: | |
46 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | |
47 returncode = proc.wait() | |
48 if returncode != 0: | |
49 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | |
50 finally: | |
51 print open(tmp_stderr_name, "r").read(64000) | |
52 print open(tmp_stdout_name, "r").read(64000) | |
53 | |
54 | |
55 def delete_file(path): | |
56 if os.path.exists(path): | |
57 try: | |
58 os.remove(path) | |
59 except: | |
60 pass | |
61 | |
62 | |
63 def delete_directory(directory): | |
64 if os.path.exists(directory): | |
65 try: | |
66 shutil.rmtree(directory) | |
67 except: | |
68 pass | |
69 | |
70 | |
71 def symlink(source, link_name): | |
72 import platform | |
73 if platform.system() == 'Windows': | |
74 try: | |
75 import win32file | |
76 win32file.CreateSymbolicLink(source, link_name, 1) | |
77 except: | |
78 shutil.copy(source, link_name) | |
79 else: | |
80 os.symlink(source, link_name) | |
81 | |
82 | |
83 def copy_to_working_directory(data_file, relative_path): | |
84 if os.path.abspath(data_file) != os.path.abspath(relative_path): | |
85 shutil.copy(data_file, relative_path) | |
86 return relative_path | |
87 | |
88 | |
89 def __main__(): | |
90 run_script() | |
91 | |
92 | |
93 ## Lock File Stuff | |
94 ## http://www.evanfosmark.com/2009/01/cross-platform-file-locking-support-in-python/ | |
95 import os | |
96 import time | |
97 import errno | |
98 | |
99 | |
100 class FileLockException(Exception): | |
101 pass | |
102 | |
103 | |
104 class FileLock(object): | |
105 """ A file locking mechanism that has context-manager support so | |
106 you can use it in a with statement. This should be relatively cross | |
107 compatible as it doesn't rely on msvcrt or fcntl for the locking. | |
108 """ | |
109 | |
110 def __init__(self, file_name, timeout=10, delay=.05): | |
111 """ Prepare the file locker. Specify the file to lock and optionally | |
112 the maximum timeout and the delay between each attempt to lock. | |
113 """ | |
114 self.is_locked = False | |
115 self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name) | |
116 self.file_name = file_name | |
117 self.timeout = timeout | |
118 self.delay = delay | |
119 | |
120 def acquire(self): | |
121 """ Acquire the lock, if possible. If the lock is in use, it check again | |
122 every `wait` seconds. It does this until it either gets the lock or | |
123 exceeds `timeout` number of seconds, in which case it throws | |
124 an exception. | |
125 """ | |
126 start_time = time.time() | |
127 while True: | |
128 try: | |
129 self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) | |
130 break | |
131 except OSError as e: | |
132 if e.errno != errno.EEXIST: | |
133 raise | |
134 if (time.time() - start_time) >= self.timeout: | |
135 raise FileLockException("Timeout occured.") | |
136 time.sleep(self.delay) | |
137 self.is_locked = True | |
138 | |
139 def release(self): | |
140 """ Get rid of the lock by deleting the lockfile. | |
141 When working in a `with` statement, this gets automatically | |
142 called at the end. | |
143 """ | |
144 if self.is_locked: | |
145 os.close(self.fd) | |
146 os.unlink(self.lockfile) | |
147 self.is_locked = False | |
148 | |
149 def __enter__(self): | |
150 """ Activated when used in the with statement. | |
151 Should automatically acquire a lock to be used in the with block. | |
152 """ | |
153 if not self.is_locked: | |
154 self.acquire() | |
155 return self | |
156 | |
157 def __exit__(self, type, value, traceback): | |
158 """ Activated at the end of the with statement. | |
159 It automatically releases the lock if it isn't locked. | |
160 """ | |
161 if self.is_locked: | |
162 self.release() | |
163 | |
164 def __del__(self): | |
165 """ Make sure that the FileLock instance doesn't leave a lockfile | |
166 lying around. | |
167 """ | |
168 self.release() | |
169 | |
170 TEMPLATE = """<?xml version="1.0" encoding="utf-8"?> | |
171 <MaxQuantParams xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" runOnCluster="false" processFolder="$process_folder"> | |
172 $raw_file_info | |
173 <experimentalDesignFilename/> | |
174 <slicePeaks>$slice_peaks</slicePeaks> | |
175 <tempFolder/> | |
176 <ncores>$num_cores</ncores> | |
177 <ionCountIntensities>false</ionCountIntensities> | |
178 <maxFeatureDetectionCores>1</maxFeatureDetectionCores> | |
179 <verboseColumnHeaders>false</verboseColumnHeaders> | |
180 <minTime>NaN</minTime> | |
181 <maxTime>NaN</maxTime> | |
182 <calcPeakProperties>$calc_peak_properties</calcPeakProperties> | |
183 <useOriginalPrecursorMz>$use_original_precursor_mz</useOriginalPrecursorMz> | |
184 $fixed_mods | |
185 <multiModificationSearch>$multi_modification_search</multiModificationSearch> | |
186 <fastaFiles>$database</fastaFiles> | |
187 <fastaFilesFirstSearch/> | |
188 <fixedSearchFolder/> | |
189 <advancedRatios>$advanced_ratios</advancedRatios> | |
190 <rtShift>$rt_shift</rtShift> | |
191 <fastLfq>$fast_lfq</fastLfq> | |
192 <randomize>$randomize</randomize> | |
193 <specialAas>$special_aas</specialAas> | |
194 <includeContamiants>$include_contamiants</includeContamiants> | |
195 <equalIl>$equal_il</equalIl> | |
196 <topxWindow>100</topxWindow> | |
197 <maxPeptideMass>$max_peptide_mass</maxPeptideMass> | |
198 <reporterPif>$reporter_pif</reporterPif> | |
199 <reporterFraction>$reporter_fraction</reporterFraction> | |
200 <reporterBasePeakRatio>$reporter_base_peak_ratio</reporterBasePeakRatio> | |
201 <scoreThreshold>$score_threshold</scoreThreshold> | |
202 <filterAacounts>$filter_aacounts</filterAacounts> | |
203 <secondPeptide>$second_peptide</secondPeptide> | |
204 <matchBetweenRuns>$match_between_runs</matchBetweenRuns> | |
205 <matchBetweenRunsFdr>$match_between_runs_fdr</matchBetweenRunsFdr> | |
206 <reQuantify>$re_quantify</reQuantify> | |
207 <dependentPeptides>$dependent_peptides</dependentPeptides> | |
208 <dependentPeptideFdr>$dependent_peptide_fdr</dependentPeptideFdr> | |
209 <dependentPeptideMassBin>$dependent_peptide_mass_bin</dependentPeptideMassBin> | |
210 <labelFree>$label_free</labelFree> | |
211 <lfqMinEdgesPerNode>$lfq_min_edges_per_node</lfqMinEdgesPerNode> | |
212 <lfqAvEdgesPerNode>$lfq_av_edges_per_node</lfqAvEdgesPerNode> | |
213 <hybridQuantification>$hybrid_quantification</hybridQuantification> | |
214 <msmsConnection>$msms_connection</msmsConnection> | |
215 <ibaq>$ibaq</ibaq> | |
216 <msmsRecalibration>$msms_recalibration</msmsRecalibration> | |
217 <ibaqLogFit>$ibaq_log_fit</ibaqLogFit> | |
218 <razorProteinFdr>$razor_protein_fdr</razorProteinFdr> | |
219 <calcSequenceTags>$calc_sequence_tags</calcSequenceTags> | |
220 <deNovoVarMods>$de_novo_var_mods</deNovoVarMods> | |
221 <massDifferenceSearch>$mass_difference_search</massDifferenceSearch> | |
222 <minPepLen>$min_pep_len</minPepLen> | |
223 <peptideFdr>$peptide_fdr</peptideFdr> | |
224 <peptidePep>$peptide_pep</peptidePep> | |
225 <proteinFdr>$protein_fdr</proteinFdr> | |
226 <siteFdr>$site_fdr</siteFdr> | |
227 <minPeptideLengthForUnspecificSearch>$min_peptide_length_for_unspecific_search</minPeptideLengthForUnspecificSearch> | |
228 <maxPeptideLengthForUnspecificSearch>$max_peptide_length_for_unspecific_search</maxPeptideLengthForUnspecificSearch> | |
229 <useNormRatiosForOccupancy>$use_norm_ratios_for_occupancy</useNormRatiosForOccupancy> | |
230 <minPeptides>$min_peptides</minPeptides> | |
231 <minRazorPeptides>$min_razor_peptides</minRazorPeptides> | |
232 <minUniquePeptides>$min_unique_peptides</minUniquePeptides> | |
233 <useCounterparts>$use_counterparts</useCounterparts> | |
234 <minRatioCount>$min_ratio_count</minRatioCount> | |
235 <lfqMinRatioCount>$lfq_min_ratio_count</lfqMinRatioCount> | |
236 <restrictProteinQuantification>$restrict_protein_quantification</restrictProteinQuantification> | |
237 $restrict_mods | |
238 <matchingTimeWindow>$matching_time_window</matchingTimeWindow> | |
239 <numberOfCandidatesMultiplexedMsms>$number_of_candidates_multiplexed_msms</numberOfCandidatesMultiplexedMsms> | |
240 <numberOfCandidatesMsms>$number_of_candidates_msms</numberOfCandidatesMsms> | |
241 <separateAasForSiteFdr>$separate_aas_for_site_fdr</separateAasForSiteFdr> | |
242 <massDifferenceMods /> | |
243 <aifParams aifSilWeight="$aif_sil_weight" | |
244 aifIsoWeight="$aif_iso_weight" | |
245 aifTopx="$aif_topx" | |
246 aifCorrelation="$aif_correlation" | |
247 aifCorrelationFirstPass="$aif_correlation_first_pass" | |
248 aifMinMass="$aif_min_mass" | |
249 aifMsmsTol="$aif_msms_tol" | |
250 aifSecondPass="$aif_second_pass" | |
251 aifIterative="$aif_iterative" | |
252 aifThresholdFdr="$aif_threhold_fdr" /> | |
253 <groups> | |
254 <ParameterGroups> | |
255 $group_params | |
256 </ParameterGroups> | |
257 </groups> | |
258 <qcSettings> | |
259 <qcSetting xsi:nil="true" /> | |
260 </qcSettings> | |
261 <msmsParams> | |
262 $ftms_fragment_settings | |
263 $itms_fragment_settings | |
264 $tof_fragment_settings | |
265 $unknown_fragment_settings | |
266 </msmsParams> | |
267 <keepLowScoresMode>$keep_low_scores_mode</keepLowScoresMode> | |
268 <msmsCentroidMode>$msms_centroid_mode</msmsCentroidMode> | |
269 <quantMode>$quant_mode</quantMode> | |
270 <siteQuantMode>$site_quant_mode</siteQuantMode> | |
271 <groupParams> | |
272 <groupParam> | |
273 $group_params | |
274 </groupParam> | |
275 </groupParams> | |
276 </MaxQuantParams> | |
277 """ | |
278 | |
279 GROUP_TEMPLATE = """ | |
280 <maxCharge>$max_charge</maxCharge> | |
281 <lcmsRunType>$lcms_run_type</lcmsRunType> | |
282 <msInstrument>$ms_instrument</msInstrument> | |
283 <groupIndex>$group_index</groupIndex> | |
284 <maxLabeledAa>$max_labeled_aa</maxLabeledAa> | |
285 <maxNmods>$max_n_mods</maxNmods> | |
286 <maxMissedCleavages>$max_missed_cleavages</maxMissedCleavages> | |
287 <multiplicity>$multiplicity</multiplicity> | |
288 <protease>$protease</protease> | |
289 <proteaseFirstSearch>$protease</proteaseFirstSearch> | |
290 <useProteaseFirstSearch>false</useProteaseFirstSearch> | |
291 <useVariableModificationsFirstSearch>false</useVariableModificationsFirstSearch> | |
292 $variable_mods | |
293 $isobaric_labels | |
294 <variableModificationsFirstSearch> | |
295 <string>Oxidation (M)</string> | |
296 <string>Acetyl (Protein N-term)</string> | |
297 </variableModificationsFirstSearch> | |
298 <hasAdditionalVariableModifications>false</hasAdditionalVariableModifications> | |
299 <additionalVariableModifications> | |
300 <ArrayOfString /> | |
301 </additionalVariableModifications> | |
302 <additionalVariableModificationProteins> | |
303 <ArrayOfString /> | |
304 </additionalVariableModificationProteins> | |
305 <doMassFiltering>$do_mass_filtering</doMassFiltering> | |
306 <firstSearchTol>$first_search_tol</firstSearchTol> | |
307 <mainSearchTol>$main_search_tol</mainSearchTol> | |
308 $labels | |
309 """ | |
310 | |
311 # <labels> | |
312 # <string /> | |
313 # <string>Arg10; Lys8</string> | |
314 # </labels> | |
315 | |
316 fragment_settings = { | |
317 "FTMS": {"InPpm": "true", "Deisotope": "true", "Topx": "10", "HigherCharges": "true", | |
318 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
319 "tolerance_value": "20", "tolerance_unit": "Ppm", "name": "FTMS"}, | |
320 "ITMS": {"InPpm": "false", "Deisotope": "false", "Topx": "6", "HigherCharges": "true", | |
321 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
322 "tolerance_value": "0.5", "tolerance_unit": "Dalton", "name": "ITMS"}, | |
323 "TOF": {"InPpm": "false", "Deisotope": "true", "Topx": "10", "HigherCharges": "true", | |
324 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
325 "tolerance_value": "0.1", "tolerance_unit": "Dalton", "name": "TOF"}, | |
326 "Unknown": {"InPpm": "false", "Deisotope": "false", "Topx": "6", "HigherCharges": "true", | |
327 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
328 "tolerance_value": "0.5", "tolerance_unit": "Dalton", "name": "Unknown"}, | |
329 } | |
330 | |
331 | |
332 def build_isobaric_labels(reporter_type): | |
333 if not reporter_type: | |
334 return "<isobaricLabels />" | |
335 if reporter_type == "itraq_4plex": | |
336 prefix = "iTRAQ4plex" | |
337 mzs = [114, 115, 116, 117] | |
338 elif reporter_type == "itraq_8plex": | |
339 prefix = "iTRAQ8plex" | |
340 mzs = [113, 114, 115, 116, 117, 118, 119, 121] | |
341 elif reporter_type == "tmt_2plex": | |
342 prefix = "TMT2plex" | |
343 mzs = [126, 127] | |
344 elif reporter_type == "tmt_6plex": | |
345 prefix = "TMT6plex" | |
346 mzs = [126, 127, 128, 129, 130, 131] | |
347 else: | |
348 raise Exception("Unknown reporter type - %s" % reporter_type) | |
349 labels = ["%s-%s%d" % (prefix, term, mz) for term in ["Nter", "Lys"] for mz in mzs] | |
350 return wrap(map(xml_string, labels), "isobaricLabels") | |
351 | |
352 | |
353 def parse_groups(inputs_file, group_parts=["num"], input_parts=["name", "path"]): | |
354 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()] | |
355 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")] | |
356 cur_group = None | |
357 i = 0 | |
358 group_prefixes = ["%s:" % group_part for group_part in group_parts] | |
359 input_prefixes = ["%s:" % input_part for input_part in input_parts] | |
360 groups = {} | |
361 while i < len(inputs_lines): | |
362 line = inputs_lines[i] | |
363 if line.startswith(group_prefixes[0]): | |
364 # Start new group | |
365 cur_group = line[len(group_prefixes[0]):] | |
366 group_data = {} | |
367 for j, group_prefix in enumerate(group_prefixes): | |
368 group_line = inputs_lines[i + j] | |
369 group_data[group_parts[j]] = group_line[len(group_prefix):] | |
370 i += len(group_prefixes) | |
371 elif line.startswith(input_prefixes[0]): | |
372 input = [] | |
373 for j, input_prefix in enumerate(input_prefixes): | |
374 part_line = inputs_lines[i + j] | |
375 part = part_line[len(input_prefixes[j]):] | |
376 input.append(part) | |
377 if cur_group not in groups: | |
378 groups[cur_group] = {"group_data": group_data, "inputs": []} | |
379 groups[cur_group]["inputs"].append(input) | |
380 i += len(input_prefixes) | |
381 else: | |
382 # Skip empty line | |
383 i += 1 | |
384 return groups | |
385 | |
386 | |
387 def add_fragment_options(parser): | |
388 for name, options in fragment_settings.iteritems(): | |
389 for key, value in options.iteritems(): | |
390 option_key = ("%s_%s" % (name, key)).lower() | |
391 parser.add_option("--%s" % option_key, default=value) | |
392 | |
393 | |
394 def update_fragment_settings(arg_options): | |
395 for name, options in fragment_settings.iteritems(): | |
396 for key, value in options.iteritems(): | |
397 arg_option_key = ("%s_%s" % (name, key)).lower() | |
398 options[key] = getattr(arg_options, arg_option_key) | |
399 | |
400 | |
401 def to_fragment_settings(name, values): | |
402 """ | |
403 """ | |
404 | |
405 fragment_settings_template = """ | |
406 <FragmentSpectrumSettings Name="$name" InPpm="$InPpm" Deisotope="$Deisotope" | |
407 Topx="$Topx" HigherCharges="$HigherCharges" IncludeWater="$IncludeWater" IncludeAmmonia="$IncludeAmmonia" | |
408 DependentLosses="$DependentLosses"> | |
409 <Tolerance> | |
410 <Value>$tolerance_value</Value> | |
411 <Unit>$tolerance_unit</Unit> | |
412 </Tolerance> | |
413 </FragmentSpectrumSettings> | |
414 """ | |
415 safe_values = dict(values) | |
416 for key, value in safe_values.iteritems(): | |
417 safe_values[key] = escape(value) | |
418 return Template(fragment_settings_template).substitute(safe_values) | |
419 | |
420 | |
421 def get_file_paths(files): | |
422 return wrap([xml_string(name) for name in files], "filePaths") | |
423 | |
424 | |
425 def get_file_names(file_names): | |
426 return wrap([xml_string(name) for name in file_names], "fileNames") | |
427 | |
428 | |
429 def get_file_groups(file_groups): | |
430 return wrap([xml_int(file_group) for file_group in file_groups], "paramGroups") | |
431 | |
432 | |
433 def wrap(values, tag): | |
434 return "<%s>%s</%s>" % (tag, "".join(values), tag) | |
435 | |
436 | |
437 def xml_string(str): | |
438 if str: | |
439 return "<string>%s</string>" % escape(str) | |
440 else: | |
441 return "<string />" | |
442 | |
443 | |
444 def xml_int(value): | |
445 return "<int>%d</int>" % int(value) | |
446 | |
447 | |
448 def get_properties(options): | |
449 direct_properties = ["lcms_run_type", | |
450 "max_missed_cleavages", | |
451 "protease", | |
452 "first_search_tol", | |
453 "main_search_tol", | |
454 "max_n_mods", | |
455 "max_charge", | |
456 "max_labeled_aa", | |
457 "do_mass_filtering", | |
458 "calc_peak_properties", | |
459 "use_original_precursor_mz", | |
460 "multi_modification_search", | |
461 "keep_low_scores_mode", | |
462 "msms_centroid_mode", | |
463 "quant_mode", | |
464 "site_quant_mode", | |
465 "advanced_ratios", | |
466 "rt_shift", | |
467 "fast_lfq", | |
468 "randomize", | |
469 "aif_sil_weight", | |
470 "aif_iso_weight", | |
471 "aif_topx", | |
472 "aif_correlation", | |
473 "aif_correlation_first_pass", | |
474 "aif_min_mass", | |
475 "aif_msms_tol", | |
476 "aif_second_pass", | |
477 "aif_iterative", | |
478 "aif_threhold_fdr", | |
479 "restrict_protein_quantification", | |
480 "matching_time_window", | |
481 "number_of_candidates_multiplexed_msms", | |
482 "number_of_candidates_msms", | |
483 "separate_aas_for_site_fdr", | |
484 "special_aas", | |
485 "include_contamiants", | |
486 "equal_il", | |
487 "topx_window", | |
488 "max_peptide_mass", | |
489 "reporter_pif", | |
490 "reporter_fraction", | |
491 "reporter_base_peak_ratio", | |
492 "score_threshold", | |
493 "filter_aacounts", | |
494 "second_peptide", | |
495 "match_between_runs", | |
496 "match_between_runs_fdr", | |
497 "re_quantify", | |
498 "dependent_peptides", | |
499 "dependent_peptide_fdr", | |
500 "dependent_peptide_mass_bin", | |
501 "label_free", | |
502 "lfq_min_edges_per_node", | |
503 "lfq_av_edges_per_node", | |
504 "hybrid_quantification", | |
505 "msms_connection", | |
506 "ibaq", | |
507 "msms_recalibration", | |
508 "ibaq_log_fit", | |
509 "razor_protein_fdr", | |
510 "calc_sequence_tags", | |
511 "de_novo_var_mods", | |
512 "mass_difference_search", | |
513 "min_pep_len", | |
514 "peptide_fdr", | |
515 "peptide_pep", | |
516 "protein_fdr", | |
517 "site_fdr", | |
518 "min_peptide_length_for_unspecific_search", | |
519 "max_peptide_length_for_unspecific_search", | |
520 "use_norm_ratios_for_occupancy", | |
521 "min_peptides", | |
522 "min_razor_peptides", | |
523 "min_unique_peptides", | |
524 "use_counterparts", | |
525 "min_ratio_count", | |
526 "lfq_min_ratio_count", | |
527 ] | |
528 | |
529 props = { | |
530 "slice_peaks": "true", | |
531 "num_cores": str(options.num_cores), | |
532 "database": xml_string(setup_database(options)), | |
533 "process_folder": os.path.join(os.getcwd(), "process"), | |
534 } | |
535 for prop in direct_properties: | |
536 props[prop] = str(getattr(options, prop)) | |
537 | |
538 for name, fragment_options in fragment_settings.iteritems(): | |
539 key = "%s_fragment_settings" % name.lower() | |
540 props[key] = to_fragment_settings(name, fragment_options) | |
541 | |
542 restrict_mods_string = wrap(map(xml_string, options.restrict_mods), "restrictMods") | |
543 props["restrict_mods"] = restrict_mods_string | |
544 fixed_mods_string = wrap(map(xml_string, options.fixed_mods), "fixedModifications") | |
545 props["fixed_mods"] = fixed_mods_string | |
546 variable_mods_string = wrap(map(xml_string, options.variable_mods), "variableModifications") | |
547 props["variable_mods"] = variable_mods_string | |
548 return props | |
549 | |
550 | |
551 # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python | |
552 def which(program): | |
553 import os | |
554 | |
555 def is_exe(fpath): | |
556 return os.path.isfile(fpath) and os.access(fpath, os.X_OK) | |
557 | |
558 fpath, fname = os.path.split(program) | |
559 if fpath: | |
560 if is_exe(program): | |
561 return program | |
562 else: | |
563 for path in os.environ["PATH"].split(os.pathsep): | |
564 path = path.strip('"') | |
565 exe_file = os.path.join(path, program) | |
566 if is_exe(exe_file): | |
567 return exe_file | |
568 | |
569 return None | |
570 | |
571 | |
572 def get_unique_path(base, extension): | |
573 """ | |
574 """ | |
575 return "%s_%d%s" % (base, int(time.time() * 1000), extension) | |
576 | |
577 | |
578 def get_env_property(name, default): | |
579 if name in os.environ: | |
580 return os.environ[name] | |
581 else: | |
582 return default | |
583 | |
584 | |
585 def setup_database(options): | |
586 database_path = options.database | |
587 database_name = options.database_name | |
588 database_name = database_name.replace(" ", "_") | |
589 (database_basename, extension) = os.path.splitext(database_name) | |
590 database_destination = get_unique_path(database_basename, ".fasta") | |
591 assert database_destination == os.path.basename(database_destination) | |
592 symlink(database_path, database_destination) | |
593 | |
594 database_conf = get_env_property("MAXQUANT_DATABASE_CONF", None) | |
595 if not database_conf: | |
596 exe_path = which("MaxQuantCmd.exe") | |
597 database_conf = os.path.join(os.path.dirname(exe_path), "conf", "databases.xml") | |
598 with FileLock(database_conf + ".galaxy_lock"): | |
599 tree = ET.parse(database_conf) | |
600 root = tree.getroot() | |
601 databases_node = root.find("Databases") | |
602 database_node = ET.SubElement(databases_node, 'databases') | |
603 database_node.attrib["search_expression"] = ">([^ ]*)" | |
604 database_node.attrib["replacement_expression"] = "%1" | |
605 database_node.attrib["filename"] = database_destination | |
606 tree.write(database_conf) | |
607 return os.path.abspath(database_destination) | |
608 | |
609 | |
610 def setup_inputs(input_groups_path): | |
611 parsed_groups = parse_groups(input_groups_path) | |
612 paths = [] | |
613 names = [] | |
614 group_nums = [] | |
615 for group, group_info in parsed_groups.iteritems(): | |
616 files = group_info["inputs"] | |
617 group_num = group_info["group_data"]["num"] | |
618 for (name, path) in files: | |
619 name = os.path.basename(name) | |
620 if not name.lower().endswith(".raw"): | |
621 name = "%s.%s" % (name, ".RAW") | |
622 symlink(path, name) | |
623 paths.append(os.path.abspath(name)) | |
624 names.append(os.path.splitext(name)[0]) | |
625 group_nums.append(group_num) | |
626 file_data = (get_file_paths(paths), get_file_names(names), get_file_groups(group_nums)) | |
627 return "<rawFileInfo>%s%s%s<Fractions/><Values/></rawFileInfo> " % file_data | |
628 | |
629 | |
630 def set_group_params(properties, options): | |
631 labels = [""] | |
632 if options.labels: | |
633 labels = options.labels | |
634 labels_string = wrap([xml_string(label.replace(",", "; ")) for label in labels], "labels") | |
635 group_properties = dict(properties) | |
636 group_properties["labels"] = labels_string | |
637 group_properties["multiplicity"] = len(labels) | |
638 group_properties["group_index"] = "1" | |
639 group_properties["ms_instrument"] = "0" | |
640 group_params = Template(GROUP_TEMPLATE).substitute(group_properties) | |
641 properties["group_params"] = group_params | |
642 | |
643 | |
644 def split_mods(mods_string): | |
645 return [mod for mod in mods_string.split(",") if mod] if mods_string else [] | |
646 | |
647 | |
648 def run_script(): | |
649 parser = optparse.OptionParser() | |
650 parser.add_option("--input_groups") | |
651 parser.add_option("--database") | |
652 parser.add_option("--database_name") | |
653 parser.add_option("--num_cores", type="int", default=4) | |
654 parser.add_option("--max_missed_cleavages", type="int", default=2) | |
655 parser.add_option("--protease", default="Trypsin/P") | |
656 parser.add_option("--first_search_tol", default="20") | |
657 parser.add_option("--main_search_tol", default="6") | |
658 parser.add_option("--max_n_mods", type="int", default=5) | |
659 parser.add_option("--max_charge", type="int", default=7) | |
660 parser.add_option("--do_mass_filtering", default="true") | |
661 parser.add_option("--labels", action="append", default=[]) | |
662 parser.add_option("--max_labeled_aa", type="int", default=3) | |
663 parser.add_option("--keep_low_scores_mode", type="int", default=0) | |
664 parser.add_option("--msms_centroid_mode", type="int", default=1) | |
665 # 0 = all peptides, 1 = Use razor and unique peptides, 2 = use unique peptides | |
666 parser.add_option("--quant_mode", type="int", default=1) | |
667 parser.add_option("--site_quant_mode", type="int", default=0) | |
668 parser.add_option("--aif_sil_weight", type="int", default=4) | |
669 parser.add_option("--aif_iso_weight", type="int", default=2) | |
670 parser.add_option("--aif_topx", type="int", default=50) | |
671 parser.add_option("--aif_correlation", type="float", default=0.8) | |
672 parser.add_option("--aif_correlation_first_pass", type="float", default=0.8) | |
673 parser.add_option("--aif_min_mass", type="float", default=0) | |
674 parser.add_option("--aif_msms_tol", type="float", default=10) | |
675 parser.add_option("--aif_second_pass", default="false") | |
676 parser.add_option("--aif_iterative", default="false") | |
677 parser.add_option("--aif_threhold_fdr", default="0.01") | |
678 parser.add_option("--restrict_protein_quantification", default="true") | |
679 parser.add_option("--matching_time_window", default="2") | |
680 parser.add_option("--number_of_candidates_multiplexed_msms", default="50") | |
681 parser.add_option("--number_of_candidates_msms", default="15") | |
682 parser.add_option("--separate_aas_for_site_fdr", default="true") | |
683 parser.add_option("--advanced_ratios", default="false") | |
684 parser.add_option("--rt_shift", default="false") | |
685 parser.add_option("--fast_lfq", default="true") | |
686 parser.add_option("--randomize", default="false") | |
687 parser.add_option("--special_aas", default="KR") | |
688 parser.add_option("--include_contamiants", default="false") | |
689 parser.add_option("--equal_il", default="false") | |
690 parser.add_option("--topx_window", default="100") | |
691 parser.add_option("--max_peptide_mass", default="5000") | |
692 parser.add_option("--reporter_pif", default="0.75") | |
693 parser.add_option("--reporter_fraction", default="0") | |
694 parser.add_option("--reporter_base_peak_ratio", default="0") | |
695 parser.add_option("--score_threshold", default="0") | |
696 parser.add_option("--filter_aacounts", default="true") | |
697 parser.add_option("--second_peptide", default="true") | |
698 parser.add_option("--match_between_runs", default="false") | |
699 parser.add_option("--match_between_runs_fdr", default="false") | |
700 parser.add_option("--re_quantify", default="true") | |
701 parser.add_option("--dependent_peptides", default="false") | |
702 parser.add_option("--dependent_peptide_fdr", default="0.01") | |
703 parser.add_option("--dependent_peptide_mass_bin", default="0.0055") | |
704 parser.add_option("--label_free", default="false") | |
705 parser.add_option("--lfq_min_edges_per_node", default="3") | |
706 parser.add_option("--lfq_av_edges_per_node", default="6") | |
707 parser.add_option("--hybrid_quantification", default="false") | |
708 parser.add_option("--msms_connection", default="false") | |
709 parser.add_option("--ibaq", default="false") | |
710 parser.add_option("--msms_recalibration", default="false") | |
711 parser.add_option("--ibaq_log_fit", default="true") | |
712 parser.add_option("--razor_protein_fdr", default="true") | |
713 parser.add_option("--calc_sequence_tags", default="false") | |
714 parser.add_option("--de_novo_var_mods", default="true") | |
715 parser.add_option("--mass_difference_search", default="false") | |
716 parser.add_option("--min_pep_len", default="7") | |
717 parser.add_option("--peptide_fdr", default="0.01") | |
718 parser.add_option("--peptide_pep", default="1") | |
719 parser.add_option("--protein_fdr", default="0.01") | |
720 parser.add_option("--site_fdr", default="0.01") | |
721 parser.add_option("--min_peptide_length_for_unspecific_search", default="8") | |
722 parser.add_option("--max_peptide_length_for_unspecific_search", default="25") | |
723 parser.add_option("--use_norm_ratios_for_occupancy", default="true") | |
724 parser.add_option("--min_peptides", default="1") | |
725 parser.add_option("--min_razor_peptides", default="1") | |
726 parser.add_option("--min_unique_peptides", default="0") | |
727 parser.add_option("--use_counterparts", default="false") | |
728 parser.add_option("--min_ratio_count", default="2") | |
729 parser.add_option("--lfq_min_ratio_count", default="2") | |
730 parser.add_option("--calc_peak_properties", default="false") | |
731 parser.add_option("--use_original_precursor_mz", default="false") | |
732 parser.add_option("--multi_modification_search", default="false") | |
733 parser.add_option("--lcms_run_type", default="0") | |
734 parser.add_option("--reporter_type", default=None) | |
735 parser.add_option("--output_mqpar", default=None) | |
736 text_outputs = { | |
737 "aif_msms": "aifMsms", | |
738 "all_peptides": "allPeptides", | |
739 "evidence": "evidence", | |
740 "modification_specific_peptides": "modificationSpecificPeptides", | |
741 "msms": "msms", | |
742 "msms_scans": "msmsScans", | |
743 "mz_range": "mzRange", | |
744 "parameters": "parameters", | |
745 "peptides": "peptides", | |
746 "protein_groups": "proteinGroups", | |
747 "sim_peptides": "simPeptides", | |
748 "sim_scans": "simScans", | |
749 "summary": "summary" | |
750 } | |
751 for output in text_outputs.keys(): | |
752 parser.add_option("--output_%s" % output, default=None) | |
753 | |
754 parser.add_option("--variable_mods", default="Oxidation (M),Acetyl (Protein N-term)") | |
755 parser.add_option("--restrict_mods", default="Oxidation (M),Acetyl (Protein N-term)") | |
756 parser.add_option("--fixed_mods", default="Carbamidomethyl (C)") | |
757 | |
758 add_fragment_options(parser) | |
759 | |
760 (options, args) = parser.parse_args() | |
761 options.restrict_mods = split_mods(options.restrict_mods) | |
762 options.fixed_mods = split_mods(options.fixed_mods) | |
763 options.variable_mods = split_mods(options.variable_mods) | |
764 | |
765 update_fragment_settings(options) | |
766 | |
767 raw_file_info = setup_inputs(options.input_groups) | |
768 properties = get_properties(options) | |
769 properties["raw_file_info"] = raw_file_info | |
770 properties["isobaric_labels"] = build_isobaric_labels(options.reporter_type) | |
771 set_group_params(properties, options) | |
772 driver_contents = Template(TEMPLATE).substitute(properties) | |
773 open("mqpar.xml", "w").write(driver_contents) | |
774 print driver_contents | |
775 execute("MaxQuantCmd.exe mqpar.xml %d" % options.num_cores) | |
776 for key, basename in text_outputs.iteritems(): | |
777 attribute = "output_%s" % key | |
778 destination = getattr(options, attribute, None) | |
779 if destination: | |
780 source = os.path.join("combined", "txt", "%s.txt" % basename) | |
781 shutil.copy(source, destination) | |
782 output_mqpar = options.output_mqpar | |
783 if output_mqpar: | |
784 shutil.copy("mqpar.xml", output_mqpar) | |
785 | |
786 if __name__ == '__main__': | |
787 __main__() |