0
|
1 #!/usr/bin/env python
|
|
2 import optparse
|
|
3 import os
|
|
4 import sys
|
|
5 import tempfile
|
|
6 import subprocess
|
|
7 import time
|
1
|
8 import shutil
|
0
|
9 import logging
|
1
|
10 from xml.sax.saxutils import escape
|
0
|
11
|
|
12 log = logging.getLogger(__name__)
|
|
13
|
|
14 DEBUG = True
|
|
15
|
|
16 working_directory = os.getcwd()
|
1
|
17 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
|
|
18 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
|
0
|
19
|
1
|
20
|
|
21 def stop_err(msg):
|
|
22 sys.stderr.write("%s\n" % msg)
|
0
|
23 sys.exit()
|
|
24
|
|
25
|
|
26 def read_stderr():
|
|
27 stderr = ''
|
|
28 if(os.path.exists(tmp_stderr_name)):
|
|
29 with open(tmp_stderr_name, 'rb') as tmp_stderr:
|
|
30 buffsize = 1048576
|
|
31 try:
|
|
32 while True:
|
|
33 stderr += tmp_stderr.read(buffsize)
|
|
34 if not stderr or len(stderr) % buffsize != 0:
|
|
35 break
|
|
36 except OverflowError:
|
|
37 pass
|
|
38 return stderr
|
|
39
|
|
40
|
|
41 def execute(command, stdin=None):
|
|
42 try:
|
|
43 with open(tmp_stderr_name, 'wb') as tmp_stderr:
|
|
44 with open(tmp_stdout_name, 'wb') as tmp_stdout:
|
|
45 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
|
|
46 returncode = proc.wait()
|
|
47 if returncode != 0:
|
|
48 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
|
|
49 finally:
|
|
50 print open(tmp_stderr_name, "r").read(64000)
|
|
51 print open(tmp_stdout_name, "r").read(64000)
|
|
52
|
|
53
|
|
54 def delete_file(path):
|
|
55 if os.path.exists(path):
|
|
56 try:
|
|
57 os.remove(path)
|
|
58 except:
|
|
59 pass
|
|
60
|
|
61 def delete_directory(directory):
|
|
62 if os.path.exists(directory):
|
|
63 try:
|
|
64 shutil.rmtree(directory)
|
|
65 except:
|
|
66 pass
|
|
67
|
|
68 def symlink(source, link_name):
|
|
69 import platform
|
|
70 if platform.system() == 'Windows':
|
|
71 try:
|
|
72 import win32file
|
|
73 win32file.CreateSymbolicLink(source, link_name, 1)
|
|
74 except:
|
|
75 shutil.copy(source, link_name)
|
|
76 else:
|
|
77 os.symlink(source, link_name)
|
|
78
|
|
79
|
|
80 def copy_to_working_directory(data_file, relative_path):
|
|
81 if os.path.abspath(data_file) != os.path.abspath(relative_path):
|
|
82 shutil.copy(data_file, relative_path)
|
|
83 return relative_path
|
|
84
|
|
85 def __main__():
|
|
86 run_script()
|
|
87
|
|
88 #ENDTEMPLATE
|
|
89
|
|
90 from string import Template
|
|
91
|
|
92 METHOD_TEMPLATE = """<UISETTINGS>
|
|
93 <UI_SAMPLE_TYPE>$sample_type</UI_SAMPLE_TYPE>
|
|
94 <UI_QUANT_TYPE>$quant_type</UI_QUANT_TYPE>
|
|
95 <UI_BACKGROUND_CORRECTION>$background_correction</UI_BACKGROUND_CORRECTION>
|
|
96 <UI_BIAS_CORRECTION>$bias_correction</UI_BIAS_CORRECTION>
|
|
97 <UI_CYS_ALKYLATION>$cys_alkylation</UI_CYS_ALKYLATION>
|
|
98 <UI_DIGESTION>$digestion</UI_DIGESTION>
|
|
99 <UI_SPECIAL_FACTOR>$special_factors</UI_SPECIAL_FACTOR>
|
|
100 <UI_INSTRUMENT>$instrument</UI_INSTRUMENT>
|
|
101 <UI_SPECIES></UI_SPECIES>
|
|
102 <UI_USER_NAME></UI_USER_NAME>
|
|
103 <UI_MACHINE_NAME></UI_MACHINE_NAME>
|
|
104 <UI_START_TIME></UI_START_TIME>
|
|
105 <UI_SEARCH_ID></UI_SEARCH_ID>
|
|
106 <UI_ID_FOCUS>$search_foci</UI_ID_FOCUS>
|
|
107 <UI_SEARCH_EFFORT>$search_effort</UI_SEARCH_EFFORT>
|
|
108 <UI_SEARCH_RESOURCE>$database_name</UI_SEARCH_RESOURCE>
|
|
109 <UI_MIN_UNUSED_PROTSCORE>$min_unused_protscore</UI_MIN_UNUSED_PROTSCORE>
|
|
110 <UI_PSPEP>$pspep</UI_PSPEP>
|
|
111 <UI_MAX_QUANT_LABELS>$max_quant_labels</UI_MAX_QUANT_LABELS>
|
|
112 $quant_labels
|
|
113 </UISETTINGS>
|
|
114 """
|
|
115
|
|
116 quant_special_cases = {
|
|
117 "iTRAQ 4plex (Peptide Labeled)": "iTRAQ4PLEX",
|
|
118 "iTRAQ 4plex (Protein Labeled)": "iTRAQ4PLEX",
|
|
119 "iTRAQ 8plex (Peptide Labeled)": "iTRAQ8PLEX",
|
|
120 "iTRAQ 8plex (Protein Labeled)": "iTRAQ8PLEX",
|
|
121 "mTRAQ (Peptide Labeled - M00, M04)": "mTRAQ_0-4",
|
|
122 "mTRAQ (Peptide Labeled - M00, M08)": "mTRAQ_0-8",
|
|
123 "mTRAQ (Peptide Labeled - M04, M08)": "mTRAQ_4-8",
|
|
124 "mTRAQ (Peptide Labeled - M00, M04, M08)": "mTRAQ_0-4-8",
|
|
125 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16",
|
|
126 "Cleavable ICAT": "ICAT9",
|
|
127 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide",
|
|
128 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein",
|
|
129 }
|
|
130
|
|
131
|
1
|
132 def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
|
|
133 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
|
|
134 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
|
|
135 cur_group = None
|
|
136 i = 0
|
|
137 group_prefixes = ["%s:" % group_part for group_part in group_parts]
|
|
138 input_prefixes = ["%s:" % input_part for input_part in input_parts]
|
|
139 groups = {}
|
|
140 while i < len(inputs_lines):
|
|
141 line = inputs_lines[i]
|
|
142 if line.startswith(group_prefixes[0]):
|
|
143 # Start new group
|
|
144 cur_group = line[len(group_prefixes[0]):]
|
|
145 group_data = {}
|
|
146 for j, group_prefix in enumerate(group_prefixes):
|
|
147 group_line = inputs_lines[i + j]
|
|
148 group_data[group_parts[j]] = group_line[len(group_prefix):]
|
|
149 i += len(group_prefixes)
|
|
150 elif line.startswith(input_prefixes[0]):
|
|
151 input = []
|
|
152 for j, input_prefix in enumerate(input_prefixes):
|
|
153 part_line = inputs_lines[i + j]
|
|
154 part = part_line[len(input_prefixes[j]):]
|
|
155 input.append(part)
|
|
156 if cur_group not in groups:
|
|
157 groups[cur_group] = {"group_data": group_data, "inputs": []}
|
|
158 groups[cur_group]["inputs"].append(input)
|
|
159 i += len(input_prefixes)
|
|
160 else:
|
|
161 # Skip empty line
|
|
162 i += 1
|
|
163 return groups
|
|
164
|
|
165
|
0
|
166 def get_env_property(name, default):
|
|
167 if name in os.environ:
|
|
168 return os.environ[name]
|
|
169 else:
|
|
170 return default
|
|
171
|
|
172
|
|
173 def build_quant_label(reagent, quant_type="Not Used", treatment="", minus2="0", minus1="0", plus1="0", plus2="0"):
|
|
174 return {
|
|
175 "reagent": reagent,
|
|
176 "type": quant_type,
|
|
177 "treatment": treatment,
|
|
178 "minus2": minus2,
|
|
179 "minus1": minus1,
|
|
180 "plus1": plus1,
|
|
181 "plus2": plus2,
|
|
182 }
|
|
183
|
|
184
|
|
185 def build_quant_labels(options, quant_type):
|
|
186 if quant_type == "iTRAQ8PLEX":
|
|
187 return [
|
|
188 build_quant_label("iTRAQ113", plus1="6.89", plus2="0.24"),
|
|
189 build_quant_label("iTRAQ114", minus1="0.94", plus1="5.9", plus2="0.16"),
|
|
190 build_quant_label("iTRAQ115", minus1="1.88", plus1="4.9", plus2="0.1"),
|
|
191 build_quant_label("iTRAQ116", minus1="2.82", plus1="3.9", plus2="0.07"),
|
|
192 build_quant_label("iTRAQ117", minus2="0.06", minus1="3.77", plus1="2.88"),
|
|
193 build_quant_label("iTRAQ118", minus2="0.09", minus1="4.71", plus1="1.91"),
|
|
194 build_quant_label("iTRAQ119", minus2="0.14", minus1="5.66", plus1="0.87"),
|
|
195 build_quant_label("iTRAQ121", minus2="0.27", minus1="7.44", plus1="0.18"),
|
|
196 ]
|
|
197 elif quant_type == "iTRAQ4PLEX":
|
|
198 return [
|
|
199 build_quant_label("iTRAQ114", minus1="1.00", plus1="5.9", plus2="0.20"),
|
|
200 build_quant_label("iTRAQ115", minus1="2.00", plus1="5.6", plus2="0.1"),
|
|
201 build_quant_label("iTRAQ116", minus1="3.00", plus1="4.5", plus2="0.1"),
|
|
202 build_quant_label("iTRAQ117", minus2="0.10", minus1="4.00", plus1="3.50", plus2="0.1"),
|
|
203 ]
|
|
204 else:
|
|
205 return []
|
|
206
|
|
207
|
|
208 def join_quant_labels(labels):
|
|
209 template = '<QUANT_LABEL_SETTING reagent="$reagent" type="$type" treatment="$treatment" minus2="$minus2" minus1="$minus1" plus1="$plus1" plus2="$plus2"/>'
|
|
210 return "\n".join([Template(template).substitute(quant_label) for quant_label in labels])
|
|
211
|
|
212
|
|
213 def handle_sample_type(options, parameter_dict):
|
|
214 sample_type = options.sample_type
|
|
215 if sample_type in quant_special_cases:
|
|
216 quant_type = quant_special_cases[sample_type]
|
|
217 else:
|
|
218 quant_type = sample_type
|
|
219 if options.quantitative.upper() != "TRUE":
|
|
220 quant_type = ""
|
|
221 parameter_dict["sample_type"] = sample_type
|
|
222 parameter_dict["quant_type"] = quant_type
|
|
223 parameter_dict["quant_labels"] = join_quant_labels(build_quant_labels(options, quant_type))
|
|
224
|
|
225
|
|
226 def setup_database(options):
|
|
227 PROTEINPILOT_DATABASE_DIR = get_env_property("PROTEIN_PILOT_DATABASE_FOLDER", "C:\\AB SCIEX\\ProteinPilot Data\\SearchDatabases")
|
|
228 database_path = options.database
|
|
229 database_name = options.database_name
|
|
230 database_name = database_name.replace(" ", "_")
|
|
231 (database_basename, extension) = os.path.splitext(database_name)
|
|
232 base = os.path.join(PROTEINPILOT_DATABASE_DIR, "gx_%s" % database_basename)
|
|
233 database_destination = get_unique_path(base, ".fasta")
|
|
234 symlink(database_path, database_destination)
|
|
235 return (database_destination, os.path.basename(os.path.splitext(database_destination)[0]))
|
|
236
|
|
237
|
|
238 def extract_list(parameter):
|
|
239 if parameter == None or parameter == "None":
|
|
240 parameter = ""
|
|
241 return parameter.replace(",", ";")
|
|
242
|
|
243
|
|
244 def setup_methods(options):
|
|
245 ## Setup methods file
|
|
246 (database_path, database_name) = setup_database(options)
|
|
247 special_factors = extract_list(options.special_factors)
|
|
248 search_foci = extract_list(options.search_foci)
|
|
249 method_parameters = {
|
|
250 "background_correction": options.background_correction,
|
|
251 "bias_correction": options.bias_correction,
|
|
252 "cys_alkylation": options.cys_alkylation,
|
|
253 "digestion": options.digestion,
|
|
254 "instrument": options.instrument,
|
|
255 "search_effort": options.search_effort,
|
|
256 "search_foci": search_foci,
|
|
257 "pspep": options.pspep,
|
|
258 "min_unused_protscore": options.min_unused_protscore,
|
|
259 "max_quant_labels": "3",
|
|
260 "database_name": database_name,
|
|
261 "quantitative": options.quantitative,
|
|
262 "special_factors": special_factors
|
|
263 }
|
|
264 handle_sample_type(options, method_parameters)
|
|
265 method_contents = Template(METHOD_TEMPLATE).substitute(method_parameters)
|
|
266 PROTEINPILOT_METHODS_DIR = get_env_property("PROTEIN_PILOT_METHODS_FOLDER", "C:\\ProgramData\\AB SCIEX\\ProteinPilot\\ParagonMethods\\")
|
|
267 methods_name = "gx_%s" % os.path.split(os.getcwd())[-1]
|
|
268 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name)
|
|
269 open(methods_path, "w").write(method_contents)
|
|
270 return (methods_name, methods_path, database_path)
|
|
271
|
|
272
|
1
|
273 def setup_inputs(inputs):
|
0
|
274 links = []
|
1
|
275 for input_data in inputs:
|
|
276 input_name = input_data[0]
|
|
277 input = input_data[1]
|
0
|
278 if DEBUG:
|
|
279 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size)
|
|
280 if not input_name.upper().endswith(".MGF"):
|
|
281 input_name = "%s.mgf" % input_name
|
|
282 link_path = os.path.abspath(input_name)
|
|
283 symlink(input, link_path)
|
|
284 links.append(link_path)
|
1
|
285 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % escape(link) for link in links])
|
0
|
286
|
|
287
|
|
288 def get_unique_path(base, extension):
|
|
289 """
|
|
290 """
|
|
291 return "%s_%d%s" % (base, int(time.time() * 1000), extension)
|
|
292
|
|
293
|
|
294 def move_pspep_output(options, destination, suffix):
|
|
295 if destination:
|
|
296 source = "%s__FalsePositiveAnalysis__%s.csv" % (options.output, suffix)
|
|
297 shutil.move(source, destination)
|
|
298
|
|
299
|
|
300 def run_script():
|
|
301 parser = optparse.OptionParser()
|
1
|
302 parser.add_option("--input_config")
|
0
|
303 parser.add_option("--database")
|
|
304 parser.add_option("--database_name")
|
|
305 parser.add_option("--instrument")
|
|
306 parser.add_option("--sample_type") # TODO: Restrict values
|
|
307 parser.add_option("--bias_correction", default="False")
|
|
308 parser.add_option("--background_correction", default="False")
|
|
309 parser.add_option("--cys_alkylation", default="None")
|
|
310 parser.add_option("--digestion", default="Trypsin")
|
|
311 parser.add_option("--special_factors", default="")
|
|
312 parser.add_option("--search_foci", default="")
|
|
313 parser.add_option("--search_effort", default="Rapid")
|
|
314 parser.add_option("--min_unused_protscore", default="3")
|
|
315 parser.add_option("--quantitative", default="False")
|
|
316 parser.add_option("--pspep", default="TRUE")
|
|
317 parser.add_option("--output")
|
|
318 parser.add_option("--output_methods")
|
|
319 #parser.add_option("--output_pspep_peptide", default="")
|
|
320 #parser.add_option("--output_pspep_protein", default="")
|
|
321 #parser.add_option("--output_pspep_spectra", default="")
|
|
322 parser.add_option("--output_pspep_report", default="")
|
|
323 (options, args) = parser.parse_args()
|
|
324
|
|
325 (methods_name, methods_path, database_path) = setup_methods(options)
|
|
326 try:
|
|
327 group_file = "%s.group" % options.output
|
|
328 input_contents_template = """<PROTEINPILOTPARAMETERS>
|
|
329 <METHOD name="$methods_name" />
|
|
330 $inputs
|
|
331 <RESULT filename="$output" />
|
|
332 </PROTEINPILOTPARAMETERS>"""
|
1
|
333 input_config = options.input_config
|
|
334 group_data = parse_groups(input_config)
|
|
335 group_values = group_data.values()
|
|
336 # Not using groups right now.
|
|
337 assert len(group_values) == 1, len(group_values)
|
|
338 inputs = group_data.values()[0]["inputs"]
|
0
|
339 input_parameters = {
|
1
|
340 "inputs": setup_inputs(inputs),
|
0
|
341 "output": group_file,
|
|
342 "methods_name": methods_name
|
|
343 }
|
|
344
|
|
345 input_contents = Template(input_contents_template).substitute(input_parameters)
|
|
346 open("input.xml", "w").write(input_contents)
|
|
347
|
|
348 protein_pilot_path = get_env_property("PROTEIN_PILOT_PATH", "")
|
|
349 if protein_pilot_path and not protein_pilot_path.endswith("\\"):
|
|
350 protein_pilot_path = "%s" % protein_pilot_path
|
|
351 execute("%sProteinPilot.exe input.xml" % protein_pilot_path)
|
|
352 shutil.move(group_file, options.output)
|
|
353 #move_pspep_output(options, options.output_pspep_spectra, "SpectralLevelData")
|
|
354 #move_pspep_output(options, options.output_pspep_peptide, "DistinctPeptideLevelData")
|
|
355 #move_pspep_output(options, options.output_pspep_protein, "ProteinLevelData")
|
|
356 if options.output_pspep_report:
|
|
357 source = "%s__FDR.xlsx" % (options.output)
|
|
358 shutil.move(source, options.output_pspep_report)
|
|
359 shutil.move(methods_path, options.output_methods)
|
|
360 finally:
|
|
361 delete_file(database_path)
|
|
362 delete_file(methods_path)
|
|
363
|
|
364 if __name__ == '__main__':
|
|
365 __main__()
|