Mercurial > repos > galaxyp > proteinpilot
comparison proteinpilot_wrapper.py @ 0:7dcb26ce559c
Uploaded
author | galaxyp |
---|---|
date | Wed, 19 Dec 2012 00:22:55 -0500 |
parents | |
children | 790d80981060 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7dcb26ce559c |
---|---|
1 #!/usr/bin/env python | |
2 import optparse | |
3 import os | |
4 import sys | |
5 import tempfile | |
6 import shutil | |
7 import subprocess | |
8 import re | |
9 import time | |
10 from os.path import basename | |
11 import logging | |
12 | |
13 assert sys.version_info[:2] >= ( 2, 6 ) | |
14 | |
15 log = logging.getLogger(__name__) | |
16 | |
17 DEBUG = True | |
18 | |
19 working_directory = os.getcwd() | |
20 tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name | |
21 tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name | |
22 | |
23 def stop_err( msg ): | |
24 sys.stderr.write( "%s\n" % msg ) | |
25 sys.exit() | |
26 | |
27 | |
28 def read_stderr(): | |
29 stderr = '' | |
30 if(os.path.exists(tmp_stderr_name)): | |
31 with open(tmp_stderr_name, 'rb') as tmp_stderr: | |
32 buffsize = 1048576 | |
33 try: | |
34 while True: | |
35 stderr += tmp_stderr.read(buffsize) | |
36 if not stderr or len(stderr) % buffsize != 0: | |
37 break | |
38 except OverflowError: | |
39 pass | |
40 return stderr | |
41 | |
42 | |
43 def execute(command, stdin=None): | |
44 try: | |
45 with open(tmp_stderr_name, 'wb') as tmp_stderr: | |
46 with open(tmp_stdout_name, 'wb') as tmp_stdout: | |
47 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | |
48 returncode = proc.wait() | |
49 if returncode != 0: | |
50 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | |
51 finally: | |
52 print open(tmp_stderr_name, "r").read(64000) | |
53 print open(tmp_stdout_name, "r").read(64000) | |
54 | |
55 | |
56 def delete_file(path): | |
57 if os.path.exists(path): | |
58 try: | |
59 os.remove(path) | |
60 except: | |
61 pass | |
62 | |
63 def delete_directory(directory): | |
64 if os.path.exists(directory): | |
65 try: | |
66 shutil.rmtree(directory) | |
67 except: | |
68 pass | |
69 | |
70 def symlink(source, link_name): | |
71 import platform | |
72 if platform.system() == 'Windows': | |
73 try: | |
74 import win32file | |
75 win32file.CreateSymbolicLink(source, link_name, 1) | |
76 except: | |
77 shutil.copy(source, link_name) | |
78 else: | |
79 os.symlink(source, link_name) | |
80 | |
81 | |
82 def copy_to_working_directory(data_file, relative_path): | |
83 if os.path.abspath(data_file) != os.path.abspath(relative_path): | |
84 shutil.copy(data_file, relative_path) | |
85 return relative_path | |
86 | |
87 def __main__(): | |
88 run_script() | |
89 | |
90 #ENDTEMPLATE | |
91 | |
92 from string import Template | |
93 | |
94 METHOD_TEMPLATE = """<UISETTINGS> | |
95 <UI_SAMPLE_TYPE>$sample_type</UI_SAMPLE_TYPE> | |
96 <UI_QUANT_TYPE>$quant_type</UI_QUANT_TYPE> | |
97 <UI_BACKGROUND_CORRECTION>$background_correction</UI_BACKGROUND_CORRECTION> | |
98 <UI_BIAS_CORRECTION>$bias_correction</UI_BIAS_CORRECTION> | |
99 <UI_CYS_ALKYLATION>$cys_alkylation</UI_CYS_ALKYLATION> | |
100 <UI_DIGESTION>$digestion</UI_DIGESTION> | |
101 <UI_SPECIAL_FACTOR>$special_factors</UI_SPECIAL_FACTOR> | |
102 <UI_INSTRUMENT>$instrument</UI_INSTRUMENT> | |
103 <UI_SPECIES></UI_SPECIES> | |
104 <UI_USER_NAME></UI_USER_NAME> | |
105 <UI_MACHINE_NAME></UI_MACHINE_NAME> | |
106 <UI_START_TIME></UI_START_TIME> | |
107 <UI_SEARCH_ID></UI_SEARCH_ID> | |
108 <UI_ID_FOCUS>$search_foci</UI_ID_FOCUS> | |
109 <UI_SEARCH_EFFORT>$search_effort</UI_SEARCH_EFFORT> | |
110 <UI_SEARCH_RESOURCE>$database_name</UI_SEARCH_RESOURCE> | |
111 <UI_MIN_UNUSED_PROTSCORE>$min_unused_protscore</UI_MIN_UNUSED_PROTSCORE> | |
112 <UI_PSPEP>$pspep</UI_PSPEP> | |
113 <UI_MAX_QUANT_LABELS>$max_quant_labels</UI_MAX_QUANT_LABELS> | |
114 $quant_labels | |
115 </UISETTINGS> | |
116 """ | |
117 | |
118 quant_special_cases = { | |
119 "iTRAQ 4plex (Peptide Labeled)": "iTRAQ4PLEX", | |
120 "iTRAQ 4plex (Protein Labeled)": "iTRAQ4PLEX", | |
121 "iTRAQ 8plex (Peptide Labeled)": "iTRAQ8PLEX", | |
122 "iTRAQ 8plex (Protein Labeled)": "iTRAQ8PLEX", | |
123 "mTRAQ (Peptide Labeled - M00, M04)": "mTRAQ_0-4", | |
124 "mTRAQ (Peptide Labeled - M00, M08)": "mTRAQ_0-8", | |
125 "mTRAQ (Peptide Labeled - M04, M08)": "mTRAQ_4-8", | |
126 "mTRAQ (Peptide Labeled - M00, M04, M08)": "mTRAQ_0-4-8", | |
127 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16", | |
128 "Cleavable ICAT": "ICAT9", | |
129 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide", | |
130 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein", | |
131 } | |
132 | |
133 | |
134 def get_env_property(name, default): | |
135 if name in os.environ: | |
136 return os.environ[name] | |
137 else: | |
138 return default | |
139 | |
140 | |
141 def build_quant_label(reagent, quant_type="Not Used", treatment="", minus2="0", minus1="0", plus1="0", plus2="0"): | |
142 return { | |
143 "reagent": reagent, | |
144 "type": quant_type, | |
145 "treatment": treatment, | |
146 "minus2": minus2, | |
147 "minus1": minus1, | |
148 "plus1": plus1, | |
149 "plus2": plus2, | |
150 } | |
151 | |
152 | |
153 def build_quant_labels(options, quant_type): | |
154 if quant_type == "iTRAQ8PLEX": | |
155 return [ | |
156 build_quant_label("iTRAQ113", plus1="6.89", plus2="0.24"), | |
157 build_quant_label("iTRAQ114", minus1="0.94", plus1="5.9", plus2="0.16"), | |
158 build_quant_label("iTRAQ115", minus1="1.88", plus1="4.9", plus2="0.1"), | |
159 build_quant_label("iTRAQ116", minus1="2.82", plus1="3.9", plus2="0.07"), | |
160 build_quant_label("iTRAQ117", minus2="0.06", minus1="3.77", plus1="2.88"), | |
161 build_quant_label("iTRAQ118", minus2="0.09", minus1="4.71", plus1="1.91"), | |
162 build_quant_label("iTRAQ119", minus2="0.14", minus1="5.66", plus1="0.87"), | |
163 build_quant_label("iTRAQ121", minus2="0.27", minus1="7.44", plus1="0.18"), | |
164 ] | |
165 elif quant_type == "iTRAQ4PLEX": | |
166 return [ | |
167 build_quant_label("iTRAQ114", minus1="1.00", plus1="5.9", plus2="0.20"), | |
168 build_quant_label("iTRAQ115", minus1="2.00", plus1="5.6", plus2="0.1"), | |
169 build_quant_label("iTRAQ116", minus1="3.00", plus1="4.5", plus2="0.1"), | |
170 build_quant_label("iTRAQ117", minus2="0.10", minus1="4.00", plus1="3.50", plus2="0.1"), | |
171 ] | |
172 else: | |
173 return [] | |
174 | |
175 | |
176 def join_quant_labels(labels): | |
177 template = '<QUANT_LABEL_SETTING reagent="$reagent" type="$type" treatment="$treatment" minus2="$minus2" minus1="$minus1" plus1="$plus1" plus2="$plus2"/>' | |
178 return "\n".join([Template(template).substitute(quant_label) for quant_label in labels]) | |
179 | |
180 | |
181 def handle_sample_type(options, parameter_dict): | |
182 sample_type = options.sample_type | |
183 if sample_type in quant_special_cases: | |
184 quant_type = quant_special_cases[sample_type] | |
185 else: | |
186 quant_type = sample_type | |
187 if options.quantitative.upper() != "TRUE": | |
188 quant_type = "" | |
189 parameter_dict["sample_type"] = sample_type | |
190 parameter_dict["quant_type"] = quant_type | |
191 parameter_dict["quant_labels"] = join_quant_labels(build_quant_labels(options, quant_type)) | |
192 | |
193 | |
194 def setup_database(options): | |
195 PROTEINPILOT_DATABASE_DIR = get_env_property("PROTEIN_PILOT_DATABASE_FOLDER", "C:\\AB SCIEX\\ProteinPilot Data\\SearchDatabases") | |
196 database_path = options.database | |
197 database_name = options.database_name | |
198 database_name = database_name.replace(" ", "_") | |
199 (database_basename, extension) = os.path.splitext(database_name) | |
200 base = os.path.join(PROTEINPILOT_DATABASE_DIR, "gx_%s" % database_basename) | |
201 database_destination = get_unique_path(base, ".fasta") | |
202 symlink(database_path, database_destination) | |
203 return (database_destination, os.path.basename(os.path.splitext(database_destination)[0])) | |
204 | |
205 | |
206 def extract_list(parameter): | |
207 if parameter == None or parameter == "None": | |
208 parameter = "" | |
209 return parameter.replace(",", ";") | |
210 | |
211 | |
212 def setup_methods(options): | |
213 ## Setup methods file | |
214 (database_path, database_name) = setup_database(options) | |
215 special_factors = extract_list(options.special_factors) | |
216 search_foci = extract_list(options.search_foci) | |
217 method_parameters = { | |
218 "background_correction": options.background_correction, | |
219 "bias_correction": options.bias_correction, | |
220 "cys_alkylation": options.cys_alkylation, | |
221 "digestion": options.digestion, | |
222 "instrument": options.instrument, | |
223 "search_effort": options.search_effort, | |
224 "search_foci": search_foci, | |
225 "pspep": options.pspep, | |
226 "min_unused_protscore": options.min_unused_protscore, | |
227 "max_quant_labels": "3", | |
228 "database_name": database_name, | |
229 "quantitative": options.quantitative, | |
230 "special_factors": special_factors | |
231 } | |
232 handle_sample_type(options, method_parameters) | |
233 method_contents = Template(METHOD_TEMPLATE).substitute(method_parameters) | |
234 PROTEINPILOT_METHODS_DIR = get_env_property("PROTEIN_PILOT_METHODS_FOLDER", "C:\\ProgramData\\AB SCIEX\\ProteinPilot\\ParagonMethods\\") | |
235 methods_name = "gx_%s" % os.path.split(os.getcwd())[-1] | |
236 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name) | |
237 open(methods_path, "w").write(method_contents) | |
238 return (methods_name, methods_path, database_path) | |
239 | |
240 | |
241 def setup_inputs(inputs, input_names): | |
242 links = [] | |
243 for input, input_name in zip(inputs, input_names): | |
244 if DEBUG: | |
245 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size) | |
246 if not input_name.upper().endswith(".MGF"): | |
247 input_name = "%s.mgf" % input_name | |
248 link_path = os.path.abspath(input_name) | |
249 symlink(input, link_path) | |
250 links.append(link_path) | |
251 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % link for link in links]) | |
252 | |
253 | |
254 def get_unique_path(base, extension): | |
255 """ | |
256 """ | |
257 return "%s_%d%s" % (base, int(time.time() * 1000), extension) | |
258 | |
259 | |
260 def move_pspep_output(options, destination, suffix): | |
261 if destination: | |
262 source = "%s__FalsePositiveAnalysis__%s.csv" % (options.output, suffix) | |
263 shutil.move(source, destination) | |
264 | |
265 | |
266 def run_script(): | |
267 parser = optparse.OptionParser() | |
268 parser.add_option("--input", dest="input", action="append", default=[]) | |
269 parser.add_option("--input_name", dest="input_name", action="append", default=[]) | |
270 parser.add_option("--database") | |
271 parser.add_option("--database_name") | |
272 parser.add_option("--instrument") | |
273 parser.add_option("--sample_type") # TODO: Restrict values | |
274 parser.add_option("--bias_correction", default="False") | |
275 parser.add_option("--background_correction", default="False") | |
276 parser.add_option("--cys_alkylation", default="None") | |
277 parser.add_option("--digestion", default="Trypsin") | |
278 parser.add_option("--special_factors", default="") | |
279 parser.add_option("--search_foci", default="") | |
280 parser.add_option("--search_effort", default="Rapid") | |
281 parser.add_option("--min_unused_protscore", default="3") | |
282 parser.add_option("--quantitative", default="False") | |
283 parser.add_option("--pspep", default="TRUE") | |
284 parser.add_option("--output") | |
285 parser.add_option("--output_methods") | |
286 #parser.add_option("--output_pspep_peptide", default="") | |
287 #parser.add_option("--output_pspep_protein", default="") | |
288 #parser.add_option("--output_pspep_spectra", default="") | |
289 parser.add_option("--output_pspep_report", default="") | |
290 (options, args) = parser.parse_args() | |
291 | |
292 (methods_name, methods_path, database_path) = setup_methods(options) | |
293 try: | |
294 group_file = "%s.group" % options.output | |
295 input_contents_template = """<PROTEINPILOTPARAMETERS> | |
296 <METHOD name="$methods_name" /> | |
297 $inputs | |
298 <RESULT filename="$output" /> | |
299 </PROTEINPILOTPARAMETERS>""" | |
300 input_parameters = { | |
301 "inputs": setup_inputs(options.input, options.input_name), | |
302 "output": group_file, | |
303 "methods_name": methods_name | |
304 } | |
305 | |
306 input_contents = Template(input_contents_template).substitute(input_parameters) | |
307 open("input.xml", "w").write(input_contents) | |
308 | |
309 protein_pilot_path = get_env_property("PROTEIN_PILOT_PATH", "") | |
310 if protein_pilot_path and not protein_pilot_path.endswith("\\"): | |
311 protein_pilot_path = "%s" % protein_pilot_path | |
312 execute("%sProteinPilot.exe input.xml" % protein_pilot_path) | |
313 shutil.move(group_file, options.output) | |
314 #move_pspep_output(options, options.output_pspep_spectra, "SpectralLevelData") | |
315 #move_pspep_output(options, options.output_pspep_peptide, "DistinctPeptideLevelData") | |
316 #move_pspep_output(options, options.output_pspep_protein, "ProteinLevelData") | |
317 if options.output_pspep_report: | |
318 source = "%s__FDR.xlsx" % (options.output) | |
319 shutil.move(source, options.output_pspep_report) | |
320 shutil.move(methods_path, options.output_methods) | |
321 finally: | |
322 delete_file(database_path) | |
323 delete_file(methods_path) | |
324 | |
325 if __name__ == '__main__': | |
326 __main__() |