Mercurial > repos > galaxyp > proteinpilot
comparison proteinpilot_wrapper.py @ 1:790d80981060
Update.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 18:04:15 -0400 |
parents | 7dcb26ce559c |
children |
comparison
equal
deleted
inserted
replaced
0:7dcb26ce559c | 1:790d80981060 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 import optparse | 2 import optparse |
3 import os | 3 import os |
4 import sys | 4 import sys |
5 import tempfile | 5 import tempfile |
6 import shutil | |
7 import subprocess | 6 import subprocess |
8 import re | |
9 import time | 7 import time |
10 from os.path import basename | 8 import shutil |
11 import logging | 9 import logging |
12 | 10 from xml.sax.saxutils import escape |
13 assert sys.version_info[:2] >= ( 2, 6 ) | |
14 | 11 |
15 log = logging.getLogger(__name__) | 12 log = logging.getLogger(__name__) |
16 | 13 |
17 DEBUG = True | 14 DEBUG = True |
18 | 15 |
19 working_directory = os.getcwd() | 16 working_directory = os.getcwd() |
20 tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name | 17 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name |
21 tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name | 18 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name |
22 | 19 |
23 def stop_err( msg ): | 20 |
24 sys.stderr.write( "%s\n" % msg ) | 21 def stop_err(msg): |
22 sys.stderr.write("%s\n" % msg) | |
25 sys.exit() | 23 sys.exit() |
26 | 24 |
27 | 25 |
28 def read_stderr(): | 26 def read_stderr(): |
29 stderr = '' | 27 stderr = '' |
127 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16", | 125 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16", |
128 "Cleavable ICAT": "ICAT9", | 126 "Cleavable ICAT": "ICAT9", |
129 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide", | 127 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide", |
130 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein", | 128 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein", |
131 } | 129 } |
130 | |
131 | |
132 def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]): | |
133 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()] | |
134 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")] | |
135 cur_group = None | |
136 i = 0 | |
137 group_prefixes = ["%s:" % group_part for group_part in group_parts] | |
138 input_prefixes = ["%s:" % input_part for input_part in input_parts] | |
139 groups = {} | |
140 while i < len(inputs_lines): | |
141 line = inputs_lines[i] | |
142 if line.startswith(group_prefixes[0]): | |
143 # Start new group | |
144 cur_group = line[len(group_prefixes[0]):] | |
145 group_data = {} | |
146 for j, group_prefix in enumerate(group_prefixes): | |
147 group_line = inputs_lines[i + j] | |
148 group_data[group_parts[j]] = group_line[len(group_prefix):] | |
149 i += len(group_prefixes) | |
150 elif line.startswith(input_prefixes[0]): | |
151 input = [] | |
152 for j, input_prefix in enumerate(input_prefixes): | |
153 part_line = inputs_lines[i + j] | |
154 part = part_line[len(input_prefixes[j]):] | |
155 input.append(part) | |
156 if cur_group not in groups: | |
157 groups[cur_group] = {"group_data": group_data, "inputs": []} | |
158 groups[cur_group]["inputs"].append(input) | |
159 i += len(input_prefixes) | |
160 else: | |
161 # Skip empty line | |
162 i += 1 | |
163 return groups | |
132 | 164 |
133 | 165 |
134 def get_env_property(name, default): | 166 def get_env_property(name, default): |
135 if name in os.environ: | 167 if name in os.environ: |
136 return os.environ[name] | 168 return os.environ[name] |
236 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name) | 268 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name) |
237 open(methods_path, "w").write(method_contents) | 269 open(methods_path, "w").write(method_contents) |
238 return (methods_name, methods_path, database_path) | 270 return (methods_name, methods_path, database_path) |
239 | 271 |
240 | 272 |
241 def setup_inputs(inputs, input_names): | 273 def setup_inputs(inputs): |
242 links = [] | 274 links = [] |
243 for input, input_name in zip(inputs, input_names): | 275 for input_data in inputs: |
276 input_name = input_data[0] | |
277 input = input_data[1] | |
244 if DEBUG: | 278 if DEBUG: |
245 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size) | 279 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size) |
246 if not input_name.upper().endswith(".MGF"): | 280 if not input_name.upper().endswith(".MGF"): |
247 input_name = "%s.mgf" % input_name | 281 input_name = "%s.mgf" % input_name |
248 link_path = os.path.abspath(input_name) | 282 link_path = os.path.abspath(input_name) |
249 symlink(input, link_path) | 283 symlink(input, link_path) |
250 links.append(link_path) | 284 links.append(link_path) |
251 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % link for link in links]) | 285 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % escape(link) for link in links]) |
252 | 286 |
253 | 287 |
254 def get_unique_path(base, extension): | 288 def get_unique_path(base, extension): |
255 """ | 289 """ |
256 """ | 290 """ |
263 shutil.move(source, destination) | 297 shutil.move(source, destination) |
264 | 298 |
265 | 299 |
266 def run_script(): | 300 def run_script(): |
267 parser = optparse.OptionParser() | 301 parser = optparse.OptionParser() |
268 parser.add_option("--input", dest="input", action="append", default=[]) | 302 parser.add_option("--input_config") |
269 parser.add_option("--input_name", dest="input_name", action="append", default=[]) | |
270 parser.add_option("--database") | 303 parser.add_option("--database") |
271 parser.add_option("--database_name") | 304 parser.add_option("--database_name") |
272 parser.add_option("--instrument") | 305 parser.add_option("--instrument") |
273 parser.add_option("--sample_type") # TODO: Restrict values | 306 parser.add_option("--sample_type") # TODO: Restrict values |
274 parser.add_option("--bias_correction", default="False") | 307 parser.add_option("--bias_correction", default="False") |
295 input_contents_template = """<PROTEINPILOTPARAMETERS> | 328 input_contents_template = """<PROTEINPILOTPARAMETERS> |
296 <METHOD name="$methods_name" /> | 329 <METHOD name="$methods_name" /> |
297 $inputs | 330 $inputs |
298 <RESULT filename="$output" /> | 331 <RESULT filename="$output" /> |
299 </PROTEINPILOTPARAMETERS>""" | 332 </PROTEINPILOTPARAMETERS>""" |
333 input_config = options.input_config | |
334 group_data = parse_groups(input_config) | |
335 group_values = group_data.values() | |
336 # Not using groups right now. | |
337 assert len(group_values) == 1, len(group_values) | |
338 inputs = group_data.values()[0]["inputs"] | |
300 input_parameters = { | 339 input_parameters = { |
301 "inputs": setup_inputs(options.input, options.input_name), | 340 "inputs": setup_inputs(inputs), |
302 "output": group_file, | 341 "output": group_file, |
303 "methods_name": methods_name | 342 "methods_name": methods_name |
304 } | 343 } |
305 | 344 |
306 input_contents = Template(input_contents_template).substitute(input_parameters) | 345 input_contents = Template(input_contents_template).substitute(input_parameters) |