comparison proteinpilot_wrapper.py @ 0:7dcb26ce559c

Uploaded
author galaxyp
date Wed, 19 Dec 2012 00:22:55 -0500
parents
children 790d80981060
comparison
equal deleted inserted replaced
-1:000000000000 0:7dcb26ce559c
1 #!/usr/bin/env python
2 import optparse
3 import os
4 import sys
5 import tempfile
6 import shutil
7 import subprocess
8 import re
9 import time
10 from os.path import basename
11 import logging
12
13 assert sys.version_info[:2] >= ( 2, 6 )
14
15 log = logging.getLogger(__name__)
16
17 DEBUG = True
18
19 working_directory = os.getcwd()
20 tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name
21 tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name
22
23 def stop_err( msg ):
24 sys.stderr.write( "%s\n" % msg )
25 sys.exit()
26
27
28 def read_stderr():
29 stderr = ''
30 if(os.path.exists(tmp_stderr_name)):
31 with open(tmp_stderr_name, 'rb') as tmp_stderr:
32 buffsize = 1048576
33 try:
34 while True:
35 stderr += tmp_stderr.read(buffsize)
36 if not stderr or len(stderr) % buffsize != 0:
37 break
38 except OverflowError:
39 pass
40 return stderr
41
42
43 def execute(command, stdin=None):
44 try:
45 with open(tmp_stderr_name, 'wb') as tmp_stderr:
46 with open(tmp_stdout_name, 'wb') as tmp_stdout:
47 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
48 returncode = proc.wait()
49 if returncode != 0:
50 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
51 finally:
52 print open(tmp_stderr_name, "r").read(64000)
53 print open(tmp_stdout_name, "r").read(64000)
54
55
56 def delete_file(path):
57 if os.path.exists(path):
58 try:
59 os.remove(path)
60 except:
61 pass
62
63 def delete_directory(directory):
64 if os.path.exists(directory):
65 try:
66 shutil.rmtree(directory)
67 except:
68 pass
69
70 def symlink(source, link_name):
71 import platform
72 if platform.system() == 'Windows':
73 try:
74 import win32file
75 win32file.CreateSymbolicLink(source, link_name, 1)
76 except:
77 shutil.copy(source, link_name)
78 else:
79 os.symlink(source, link_name)
80
81
82 def copy_to_working_directory(data_file, relative_path):
83 if os.path.abspath(data_file) != os.path.abspath(relative_path):
84 shutil.copy(data_file, relative_path)
85 return relative_path
86
87 def __main__():
88 run_script()
89
90 #ENDTEMPLATE
91
92 from string import Template
93
94 METHOD_TEMPLATE = """<UISETTINGS>
95 <UI_SAMPLE_TYPE>$sample_type</UI_SAMPLE_TYPE>
96 <UI_QUANT_TYPE>$quant_type</UI_QUANT_TYPE>
97 <UI_BACKGROUND_CORRECTION>$background_correction</UI_BACKGROUND_CORRECTION>
98 <UI_BIAS_CORRECTION>$bias_correction</UI_BIAS_CORRECTION>
99 <UI_CYS_ALKYLATION>$cys_alkylation</UI_CYS_ALKYLATION>
100 <UI_DIGESTION>$digestion</UI_DIGESTION>
101 <UI_SPECIAL_FACTOR>$special_factors</UI_SPECIAL_FACTOR>
102 <UI_INSTRUMENT>$instrument</UI_INSTRUMENT>
103 <UI_SPECIES></UI_SPECIES>
104 <UI_USER_NAME></UI_USER_NAME>
105 <UI_MACHINE_NAME></UI_MACHINE_NAME>
106 <UI_START_TIME></UI_START_TIME>
107 <UI_SEARCH_ID></UI_SEARCH_ID>
108 <UI_ID_FOCUS>$search_foci</UI_ID_FOCUS>
109 <UI_SEARCH_EFFORT>$search_effort</UI_SEARCH_EFFORT>
110 <UI_SEARCH_RESOURCE>$database_name</UI_SEARCH_RESOURCE>
111 <UI_MIN_UNUSED_PROTSCORE>$min_unused_protscore</UI_MIN_UNUSED_PROTSCORE>
112 <UI_PSPEP>$pspep</UI_PSPEP>
113 <UI_MAX_QUANT_LABELS>$max_quant_labels</UI_MAX_QUANT_LABELS>
114 $quant_labels
115 </UISETTINGS>
116 """
117
118 quant_special_cases = {
119 "iTRAQ 4plex (Peptide Labeled)": "iTRAQ4PLEX",
120 "iTRAQ 4plex (Protein Labeled)": "iTRAQ4PLEX",
121 "iTRAQ 8plex (Peptide Labeled)": "iTRAQ8PLEX",
122 "iTRAQ 8plex (Protein Labeled)": "iTRAQ8PLEX",
123 "mTRAQ (Peptide Labeled - M00, M04)": "mTRAQ_0-4",
124 "mTRAQ (Peptide Labeled - M00, M08)": "mTRAQ_0-8",
125 "mTRAQ (Peptide Labeled - M04, M08)": "mTRAQ_4-8",
126 "mTRAQ (Peptide Labeled - M00, M04, M08)": "mTRAQ_0-4-8",
127 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16",
128 "Cleavable ICAT": "ICAT9",
129 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide",
130 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein",
131 }
132
133
134 def get_env_property(name, default):
135 if name in os.environ:
136 return os.environ[name]
137 else:
138 return default
139
140
141 def build_quant_label(reagent, quant_type="Not Used", treatment="", minus2="0", minus1="0", plus1="0", plus2="0"):
142 return {
143 "reagent": reagent,
144 "type": quant_type,
145 "treatment": treatment,
146 "minus2": minus2,
147 "minus1": minus1,
148 "plus1": plus1,
149 "plus2": plus2,
150 }
151
152
153 def build_quant_labels(options, quant_type):
154 if quant_type == "iTRAQ8PLEX":
155 return [
156 build_quant_label("iTRAQ113", plus1="6.89", plus2="0.24"),
157 build_quant_label("iTRAQ114", minus1="0.94", plus1="5.9", plus2="0.16"),
158 build_quant_label("iTRAQ115", minus1="1.88", plus1="4.9", plus2="0.1"),
159 build_quant_label("iTRAQ116", minus1="2.82", plus1="3.9", plus2="0.07"),
160 build_quant_label("iTRAQ117", minus2="0.06", minus1="3.77", plus1="2.88"),
161 build_quant_label("iTRAQ118", minus2="0.09", minus1="4.71", plus1="1.91"),
162 build_quant_label("iTRAQ119", minus2="0.14", minus1="5.66", plus1="0.87"),
163 build_quant_label("iTRAQ121", minus2="0.27", minus1="7.44", plus1="0.18"),
164 ]
165 elif quant_type == "iTRAQ4PLEX":
166 return [
167 build_quant_label("iTRAQ114", minus1="1.00", plus1="5.9", plus2="0.20"),
168 build_quant_label("iTRAQ115", minus1="2.00", plus1="5.6", plus2="0.1"),
169 build_quant_label("iTRAQ116", minus1="3.00", plus1="4.5", plus2="0.1"),
170 build_quant_label("iTRAQ117", minus2="0.10", minus1="4.00", plus1="3.50", plus2="0.1"),
171 ]
172 else:
173 return []
174
175
176 def join_quant_labels(labels):
177 template = '<QUANT_LABEL_SETTING reagent="$reagent" type="$type" treatment="$treatment" minus2="$minus2" minus1="$minus1" plus1="$plus1" plus2="$plus2"/>'
178 return "\n".join([Template(template).substitute(quant_label) for quant_label in labels])
179
180
181 def handle_sample_type(options, parameter_dict):
182 sample_type = options.sample_type
183 if sample_type in quant_special_cases:
184 quant_type = quant_special_cases[sample_type]
185 else:
186 quant_type = sample_type
187 if options.quantitative.upper() != "TRUE":
188 quant_type = ""
189 parameter_dict["sample_type"] = sample_type
190 parameter_dict["quant_type"] = quant_type
191 parameter_dict["quant_labels"] = join_quant_labels(build_quant_labels(options, quant_type))
192
193
194 def setup_database(options):
195 PROTEINPILOT_DATABASE_DIR = get_env_property("PROTEIN_PILOT_DATABASE_FOLDER", "C:\\AB SCIEX\\ProteinPilot Data\\SearchDatabases")
196 database_path = options.database
197 database_name = options.database_name
198 database_name = database_name.replace(" ", "_")
199 (database_basename, extension) = os.path.splitext(database_name)
200 base = os.path.join(PROTEINPILOT_DATABASE_DIR, "gx_%s" % database_basename)
201 database_destination = get_unique_path(base, ".fasta")
202 symlink(database_path, database_destination)
203 return (database_destination, os.path.basename(os.path.splitext(database_destination)[0]))
204
205
206 def extract_list(parameter):
207 if parameter == None or parameter == "None":
208 parameter = ""
209 return parameter.replace(",", ";")
210
211
212 def setup_methods(options):
213 ## Setup methods file
214 (database_path, database_name) = setup_database(options)
215 special_factors = extract_list(options.special_factors)
216 search_foci = extract_list(options.search_foci)
217 method_parameters = {
218 "background_correction": options.background_correction,
219 "bias_correction": options.bias_correction,
220 "cys_alkylation": options.cys_alkylation,
221 "digestion": options.digestion,
222 "instrument": options.instrument,
223 "search_effort": options.search_effort,
224 "search_foci": search_foci,
225 "pspep": options.pspep,
226 "min_unused_protscore": options.min_unused_protscore,
227 "max_quant_labels": "3",
228 "database_name": database_name,
229 "quantitative": options.quantitative,
230 "special_factors": special_factors
231 }
232 handle_sample_type(options, method_parameters)
233 method_contents = Template(METHOD_TEMPLATE).substitute(method_parameters)
234 PROTEINPILOT_METHODS_DIR = get_env_property("PROTEIN_PILOT_METHODS_FOLDER", "C:\\ProgramData\\AB SCIEX\\ProteinPilot\\ParagonMethods\\")
235 methods_name = "gx_%s" % os.path.split(os.getcwd())[-1]
236 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name)
237 open(methods_path, "w").write(method_contents)
238 return (methods_name, methods_path, database_path)
239
240
241 def setup_inputs(inputs, input_names):
242 links = []
243 for input, input_name in zip(inputs, input_names):
244 if DEBUG:
245 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size)
246 if not input_name.upper().endswith(".MGF"):
247 input_name = "%s.mgf" % input_name
248 link_path = os.path.abspath(input_name)
249 symlink(input, link_path)
250 links.append(link_path)
251 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % link for link in links])
252
253
254 def get_unique_path(base, extension):
255 """
256 """
257 return "%s_%d%s" % (base, int(time.time() * 1000), extension)
258
259
260 def move_pspep_output(options, destination, suffix):
261 if destination:
262 source = "%s__FalsePositiveAnalysis__%s.csv" % (options.output, suffix)
263 shutil.move(source, destination)
264
265
266 def run_script():
267 parser = optparse.OptionParser()
268 parser.add_option("--input", dest="input", action="append", default=[])
269 parser.add_option("--input_name", dest="input_name", action="append", default=[])
270 parser.add_option("--database")
271 parser.add_option("--database_name")
272 parser.add_option("--instrument")
273 parser.add_option("--sample_type") # TODO: Restrict values
274 parser.add_option("--bias_correction", default="False")
275 parser.add_option("--background_correction", default="False")
276 parser.add_option("--cys_alkylation", default="None")
277 parser.add_option("--digestion", default="Trypsin")
278 parser.add_option("--special_factors", default="")
279 parser.add_option("--search_foci", default="")
280 parser.add_option("--search_effort", default="Rapid")
281 parser.add_option("--min_unused_protscore", default="3")
282 parser.add_option("--quantitative", default="False")
283 parser.add_option("--pspep", default="TRUE")
284 parser.add_option("--output")
285 parser.add_option("--output_methods")
286 #parser.add_option("--output_pspep_peptide", default="")
287 #parser.add_option("--output_pspep_protein", default="")
288 #parser.add_option("--output_pspep_spectra", default="")
289 parser.add_option("--output_pspep_report", default="")
290 (options, args) = parser.parse_args()
291
292 (methods_name, methods_path, database_path) = setup_methods(options)
293 try:
294 group_file = "%s.group" % options.output
295 input_contents_template = """<PROTEINPILOTPARAMETERS>
296 <METHOD name="$methods_name" />
297 $inputs
298 <RESULT filename="$output" />
299 </PROTEINPILOTPARAMETERS>"""
300 input_parameters = {
301 "inputs": setup_inputs(options.input, options.input_name),
302 "output": group_file,
303 "methods_name": methods_name
304 }
305
306 input_contents = Template(input_contents_template).substitute(input_parameters)
307 open("input.xml", "w").write(input_contents)
308
309 protein_pilot_path = get_env_property("PROTEIN_PILOT_PATH", "")
310 if protein_pilot_path and not protein_pilot_path.endswith("\\"):
311 protein_pilot_path = "%s" % protein_pilot_path
312 execute("%sProteinPilot.exe input.xml" % protein_pilot_path)
313 shutil.move(group_file, options.output)
314 #move_pspep_output(options, options.output_pspep_spectra, "SpectralLevelData")
315 #move_pspep_output(options, options.output_pspep_peptide, "DistinctPeptideLevelData")
316 #move_pspep_output(options, options.output_pspep_protein, "ProteinLevelData")
317 if options.output_pspep_report:
318 source = "%s__FDR.xlsx" % (options.output)
319 shutil.move(source, options.output_pspep_report)
320 shutil.move(methods_path, options.output_methods)
321 finally:
322 delete_file(database_path)
323 delete_file(methods_path)
324
325 if __name__ == '__main__':
326 __main__()