comparison proteinpilot_wrapper.py @ 1:790d80981060

Update.
author galaxyp
date Fri, 10 May 2013 18:04:15 -0400
parents 7dcb26ce559c
children
comparison
equal deleted inserted replaced
0:7dcb26ce559c 1:790d80981060
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 import optparse 2 import optparse
3 import os 3 import os
4 import sys 4 import sys
5 import tempfile 5 import tempfile
6 import shutil
7 import subprocess 6 import subprocess
8 import re
9 import time 7 import time
10 from os.path import basename 8 import shutil
11 import logging 9 import logging
12 10 from xml.sax.saxutils import escape
13 assert sys.version_info[:2] >= ( 2, 6 )
14 11
15 log = logging.getLogger(__name__) 12 log = logging.getLogger(__name__)
16 13
17 DEBUG = True 14 DEBUG = True
18 15
19 working_directory = os.getcwd() 16 working_directory = os.getcwd()
20 tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name 17 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
21 tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name 18 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
22 19
23 def stop_err( msg ): 20
24 sys.stderr.write( "%s\n" % msg ) 21 def stop_err(msg):
22 sys.stderr.write("%s\n" % msg)
25 sys.exit() 23 sys.exit()
26 24
27 25
28 def read_stderr(): 26 def read_stderr():
29 stderr = '' 27 stderr = ''
127 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16", 125 "Proteolytic O-18 labeling": "Proteolytic O-18 v O-16",
128 "Cleavable ICAT": "ICAT9", 126 "Cleavable ICAT": "ICAT9",
129 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide", 127 "ICPL Light, Heavy (Peptide Labeled)": "ICPL peptide",
130 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein", 128 "ICPL Light, Heavy (Protein Labeled)": "ICPL protein",
131 } 129 }
130
131
132 def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
133 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
134 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
135 cur_group = None
136 i = 0
137 group_prefixes = ["%s:" % group_part for group_part in group_parts]
138 input_prefixes = ["%s:" % input_part for input_part in input_parts]
139 groups = {}
140 while i < len(inputs_lines):
141 line = inputs_lines[i]
142 if line.startswith(group_prefixes[0]):
143 # Start new group
144 cur_group = line[len(group_prefixes[0]):]
145 group_data = {}
146 for j, group_prefix in enumerate(group_prefixes):
147 group_line = inputs_lines[i + j]
148 group_data[group_parts[j]] = group_line[len(group_prefix):]
149 i += len(group_prefixes)
150 elif line.startswith(input_prefixes[0]):
151 input = []
152 for j, input_prefix in enumerate(input_prefixes):
153 part_line = inputs_lines[i + j]
154 part = part_line[len(input_prefixes[j]):]
155 input.append(part)
156 if cur_group not in groups:
157 groups[cur_group] = {"group_data": group_data, "inputs": []}
158 groups[cur_group]["inputs"].append(input)
159 i += len(input_prefixes)
160 else:
161 # Skip empty line
162 i += 1
163 return groups
132 164
133 165
134 def get_env_property(name, default): 166 def get_env_property(name, default):
135 if name in os.environ: 167 if name in os.environ:
136 return os.environ[name] 168 return os.environ[name]
236 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name) 268 methods_path = os.path.join(PROTEINPILOT_METHODS_DIR, "%s.xml" % methods_name)
237 open(methods_path, "w").write(method_contents) 269 open(methods_path, "w").write(method_contents)
238 return (methods_name, methods_path, database_path) 270 return (methods_name, methods_path, database_path)
239 271
240 272
241 def setup_inputs(inputs, input_names): 273 def setup_inputs(inputs):
242 links = [] 274 links = []
243 for input, input_name in zip(inputs, input_names): 275 for input_data in inputs:
276 input_name = input_data[0]
277 input = input_data[1]
244 if DEBUG: 278 if DEBUG:
245 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size) 279 print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size)
246 if not input_name.upper().endswith(".MGF"): 280 if not input_name.upper().endswith(".MGF"):
247 input_name = "%s.mgf" % input_name 281 input_name = "%s.mgf" % input_name
248 link_path = os.path.abspath(input_name) 282 link_path = os.path.abspath(input_name)
249 symlink(input, link_path) 283 symlink(input, link_path)
250 links.append(link_path) 284 links.append(link_path)
251 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % link for link in links]) 285 return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % escape(link) for link in links])
252 286
253 287
254 def get_unique_path(base, extension): 288 def get_unique_path(base, extension):
255 """ 289 """
256 """ 290 """
263 shutil.move(source, destination) 297 shutil.move(source, destination)
264 298
265 299
266 def run_script(): 300 def run_script():
267 parser = optparse.OptionParser() 301 parser = optparse.OptionParser()
268 parser.add_option("--input", dest="input", action="append", default=[]) 302 parser.add_option("--input_config")
269 parser.add_option("--input_name", dest="input_name", action="append", default=[])
270 parser.add_option("--database") 303 parser.add_option("--database")
271 parser.add_option("--database_name") 304 parser.add_option("--database_name")
272 parser.add_option("--instrument") 305 parser.add_option("--instrument")
273 parser.add_option("--sample_type") # TODO: Restrict values 306 parser.add_option("--sample_type") # TODO: Restrict values
274 parser.add_option("--bias_correction", default="False") 307 parser.add_option("--bias_correction", default="False")
295 input_contents_template = """<PROTEINPILOTPARAMETERS> 328 input_contents_template = """<PROTEINPILOTPARAMETERS>
296 <METHOD name="$methods_name" /> 329 <METHOD name="$methods_name" />
297 $inputs 330 $inputs
298 <RESULT filename="$output" /> 331 <RESULT filename="$output" />
299 </PROTEINPILOTPARAMETERS>""" 332 </PROTEINPILOTPARAMETERS>"""
333 input_config = options.input_config
334 group_data = parse_groups(input_config)
335 group_values = group_data.values()
336 # Not using groups right now.
337 assert len(group_values) == 1, len(group_values)
338 inputs = group_data.values()[0]["inputs"]
300 input_parameters = { 339 input_parameters = {
301 "inputs": setup_inputs(options.input, options.input_name), 340 "inputs": setup_inputs(inputs),
302 "output": group_file, 341 "output": group_file,
303 "methods_name": methods_name 342 "methods_name": methods_name
304 } 343 }
305 344
306 input_contents = Template(input_contents_template).substitute(input_parameters) 345 input_contents = Template(input_contents_template).substitute(input_parameters)