0
|
1 #!/usr/bin/env python
|
|
2 import optparse
|
|
3 import os
|
|
4 import shutil
|
|
5 import sys
|
|
6 import tempfile
|
|
7 import subprocess
|
|
8 import logging
|
|
9 from string import Template
|
|
10 from xml.sax.saxutils import escape
|
|
11
|
|
12 log = logging.getLogger(__name__)
|
|
13
|
|
14 DEBUG = True
|
|
15
|
|
16 working_directory = os.getcwd()
|
|
17 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
|
|
18 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
|
|
19
|
|
20
|
|
21 def stop_err(msg):
|
|
22 sys.stderr.write("%s\n" % msg)
|
|
23 sys.exit()
|
|
24
|
|
25
|
|
26 def read_stderr():
|
|
27 stderr = ''
|
|
28 if(os.path.exists(tmp_stderr_name)):
|
|
29 with open(tmp_stderr_name, 'rb') as tmp_stderr:
|
|
30 buffsize = 1048576
|
|
31 try:
|
|
32 while True:
|
|
33 stderr += tmp_stderr.read(buffsize)
|
|
34 if not stderr or len(stderr) % buffsize != 0:
|
|
35 break
|
|
36 except OverflowError:
|
|
37 pass
|
|
38 return stderr
|
|
39
|
|
40
|
|
41 def execute(command, stdin=None):
|
|
42 try:
|
|
43 with open(tmp_stderr_name, 'wb') as tmp_stderr:
|
|
44 with open(tmp_stdout_name, 'wb') as tmp_stdout:
|
|
45 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
|
|
46 returncode = proc.wait()
|
|
47 if returncode != 0:
|
|
48 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
|
|
49 finally:
|
1
|
50 print((open(tmp_stderr_name, "r").read(64000)))
|
|
51 print((open(tmp_stdout_name, "r").read(64000)))
|
0
|
52
|
|
53
|
|
54 def delete_file(path):
|
|
55 if os.path.exists(path):
|
|
56 try:
|
|
57 os.remove(path)
|
|
58 except:
|
|
59 pass
|
|
60
|
|
61
|
|
62 def delete_directory(directory):
|
|
63 if os.path.exists(directory):
|
|
64 try:
|
|
65 shutil.rmtree(directory)
|
|
66 except:
|
|
67 pass
|
|
68
|
|
69
|
|
70 def symlink(source, link_name):
|
|
71 import platform
|
|
72 if platform.system() == 'Windows':
|
|
73 try:
|
|
74 import win32file
|
|
75 win32file.CreateSymbolicLink(source, link_name, 1)
|
|
76 except:
|
|
77 shutil.copy(source, link_name)
|
|
78 else:
|
|
79 os.symlink(source, link_name)
|
|
80
|
|
81
|
|
82 def copy_to_working_directory(data_file, relative_path):
|
|
83 if os.path.abspath(data_file) != os.path.abspath(relative_path):
|
|
84 shutil.copy(data_file, relative_path)
|
|
85 return relative_path
|
|
86
|
|
87
|
|
88 def __main__():
|
|
89 run_script()
|
|
90
|
|
91
|
|
92 # Extra database attributes: name, databaseAccessionRegEx, databaseDescriptionRegEx, decoyProteinRegEx
|
|
93 # Extra export types: protxml, spectrum-report, statistics, peptide-report, protein-report, experiment-report
|
|
94 RUN_TEMPLATE = """<Scaffold>
|
|
95 <Experiment name="Galaxy Scaffold Experiment">
|
|
96 <FastaDatabase id="database"
|
|
97 path="$database_path"
|
|
98 name="$database_name"
|
|
99 databaseAccessionRegEx="$database_accession_regex"
|
|
100 databaseDescriptionRegEx="$database_description_regex"
|
|
101 decoyProteinRegEx="$database_decoy_regex"
|
|
102 />
|
|
103 $samples
|
|
104 $display_thresholds
|
|
105 <Export type="sf3" path="$output_path" thresholds="thresh" />
|
|
106 </Experiment>
|
|
107 </Scaffold>
|
|
108 """
|
|
109
|
|
110 EXPORT_TEMPLATE = """<Scaffold>
|
|
111 <Experiment load="$sf3_path">
|
|
112 $display_thresholds
|
|
113 <Export $export_options path="$output_path" thresholds="thresh" />
|
|
114 </Experiment>
|
|
115 </Scaffold>
|
|
116 """
|
|
117
|
|
118 def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
|
|
119 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
|
|
120 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
|
|
121 cur_group = None
|
|
122 i = 0
|
|
123 group_prefixes = ["%s:" % group_part for group_part in group_parts]
|
|
124 input_prefixes = ["%s:" % input_part for input_part in input_parts]
|
|
125 groups = {}
|
|
126 while i < len(inputs_lines):
|
|
127 line = inputs_lines[i]
|
|
128 if line.startswith(group_prefixes[0]):
|
|
129 # Start new group
|
|
130 cur_group = line[len(group_prefixes[0]):]
|
|
131 group_data = {}
|
|
132 for j, group_prefix in enumerate(group_prefixes):
|
|
133 group_line = inputs_lines[i + j]
|
|
134 group_data[group_parts[j]] = group_line[len(group_prefix):]
|
|
135 i += len(group_prefixes)
|
|
136 elif line.startswith(input_prefixes[0]):
|
|
137 input = []
|
|
138 for j, input_prefix in enumerate(input_prefixes):
|
|
139 part_line = inputs_lines[i + j]
|
|
140 part = part_line[len(input_prefixes[j]):]
|
|
141 input.append(part)
|
|
142 if cur_group not in groups:
|
|
143 groups[cur_group] = {"group_data": group_data, "inputs": []}
|
|
144 groups[cur_group]["inputs"].append(input)
|
|
145 i += len(input_prefixes)
|
|
146 else:
|
|
147 # Skip empty line
|
|
148 i += 1
|
|
149 return groups
|
|
150
|
|
151
|
|
152 def build_samples(samples_file):
|
|
153 group_data = parse_groups(samples_file, group_parts=["sample", "mudpit", "category"], input_parts=["name", "path", "ext"])
|
|
154 samples_description = ""
|
1
|
155 for sample_name, sample_data in list(group_data.items()):
|
0
|
156 files = sample_data["inputs"]
|
|
157 mudpit = sample_data["group_data"]["mudpit"]
|
|
158 category = sample_data["group_data"]["category"]
|
|
159 samples_description += """<BiologicalSample database="database" name="%s" mudpit="%s" category="%s">\n""" % (sample_name, mudpit, category)
|
|
160 for (name, path, ext) in files:
|
|
161 name = os.path.basename(name)
|
|
162 if not name.lower().endswith(ext.lower()):
|
|
163 name = "%s.%s" % (name, ext)
|
|
164 symlink(path, name)
|
|
165 samples_description += "<InputFile>%s</InputFile>\n" % os.path.abspath(name)
|
|
166 samples_description += """</BiologicalSample>\n"""
|
|
167 return samples_description
|
|
168
|
|
169
|
|
170 def run_script():
|
|
171 action = sys.argv[1]
|
|
172 if action == "run":
|
|
173 proc = scaffold_run
|
|
174 elif action == "export":
|
|
175 proc = scaffold_export
|
|
176 proc()
|
|
177
|
|
178
|
|
179 def scaffold_export():
|
|
180 parser = optparse.OptionParser()
|
|
181 parser.add_option("--sf3")
|
|
182 parser.add_option("--output")
|
|
183 parser.add_option("--export_type")
|
|
184 populate_threshold_options(parser)
|
|
185 (options, args) = parser.parse_args()
|
|
186
|
|
187 template_parameters = {}
|
|
188
|
|
189 template_parameters["sf3_path"] = options.sf3
|
|
190 template_parameters["export_options"] = """ type="%s" """ % options.export_type
|
|
191 template_parameters["display_thresholds"] = build_display_thresholds(options)
|
|
192
|
|
193 execute_scaffold(options, EXPORT_TEMPLATE, template_parameters)
|
|
194
|
|
195
|
|
196 def build_display_thresholds(options):
|
|
197 attributes = ['id="thresh"']
|
|
198 if options.protein_probability is not None:
|
|
199 attributes.append('proteinProbability="%s"' % options.protein_probability)
|
|
200 if options.peptide_probability is not None:
|
|
201 attributes.append('peptideProbability="%s"' % options.peptide_probability)
|
|
202 if options.minimum_peptide_count is not None:
|
|
203 attributes.append('minimumPeptideCount="%s"' % options.minimum_peptide_count)
|
|
204 if options.minimum_peptide_length is not None:
|
|
205 attributes.append('minimumPeptideLength="%s"' % options.minimum_peptide_length)
|
|
206 if options.minimum_ntt is not None:
|
|
207 attributes.append('minimumNTT="%s"' % options.minimum_ntt)
|
|
208 attributes.append('useCharge="%s"' % build_use_charge_option(options))
|
|
209 tag_open = "<DisplayThresholds " + " ".join(attributes) + ">"
|
|
210 tag_body = "".join([f(options) for f in [tandem_opts, omssa_opts]])
|
|
211 tag_close = "</DisplayThresholds>"
|
|
212 return tag_open + tag_body + tag_close
|
|
213
|
|
214
|
|
215 def tandem_opts(options):
|
|
216 element = ""
|
|
217 tandem_score = options.tandem_score
|
|
218 if tandem_score:
|
|
219 element = '<TandemThresholds logExpectScores="%s,%s,%s,%s" />' % ((tandem_score,) * 4)
|
|
220 return element
|
|
221
|
|
222
|
|
223 def omssa_opts(options):
|
|
224 return ""
|
|
225
|
|
226
|
|
227 def build_use_charge_option(options):
|
|
228 use_charge_array = []
|
|
229 for i in ["1", "2", "3", "4"]:
|
|
230 use_charge_i = getattr(options, "use_charge_%s" % i, True)
|
|
231 use_charge_array.append("true" if use_charge_i else "false")
|
|
232 return ",".join(use_charge_array)
|
|
233
|
|
234
|
|
235 def populate_threshold_options(option_parser):
|
|
236 option_parser.add_option("--protein_probability", default=None)
|
|
237 option_parser.add_option("--peptide_probability", default=None)
|
|
238 option_parser.add_option("--minimum_peptide_count", default=None)
|
|
239 option_parser.add_option("--ignore_charge_1", action="store_false", dest="use_charge_1", default=True)
|
|
240 option_parser.add_option("--ignore_charge_2", action="store_false", dest="use_charge_2", default=True)
|
|
241 option_parser.add_option("--ignore_charge_3", action="store_false", dest="use_charge_3", default=True)
|
|
242 option_parser.add_option("--ignore_charge_4", action="store_false", dest="use_charge_4", default=True)
|
|
243 option_parser.add_option("--minimum_peptide_length", default=None)
|
|
244 option_parser.add_option("--minimum_ntt", default=None)
|
|
245 option_parser.add_option("--tandem_score", default=None)
|
|
246 option_parser.add_option("--omssa_peptide_probability", default=None)
|
|
247 option_parser.add_option("--omssa_log_expect_score", default=None)
|
|
248
|
|
249
|
|
250 def database_rules(database_type):
|
|
251 rules_dict = {
|
|
252 "ESTNR": (">(gi\\|[0-9]*)", ">[^ ]* (.*)"),
|
|
253 "IPI": (">IPI:([^\\| .]*)", ">[^ ]* Tax_Id=[0-9]* (.*)"),
|
|
254 "SWISSPROT": (">([^ ]*)", ">[^ ]* \\([^ ]*\\) (.*)"),
|
|
255 "UNIPROT": (">[^ ]*\\|([^ ]*)", ">[^ ]*\\|[^ ]* (.*)"),
|
|
256 "UNIREF": (">UniRef100_([^ ]*)", ">[^ ]* (.*)"),
|
|
257 "ENSEMBL": (">(ENS[^ ]*)", ">[^ ]* (.*)"),
|
|
258 "MSDB": (">([^ ]*)", ">[^ ]* (.*)"),
|
|
259 "GENERIC": (">([^ ]*)", ">[^ ]* (.*)"),
|
|
260 }
|
|
261 database_type = database_type if database_type in rules_dict else "GENERIC"
|
|
262 return rules_dict[database_type]
|
|
263
|
|
264
|
|
265 def scaffold_run():
|
|
266 parser = optparse.OptionParser()
|
|
267 parser.add_option("--samples")
|
|
268 parser.add_option("--database")
|
|
269 parser.add_option("--database_name")
|
|
270 parser.add_option("--database_type")
|
|
271 parser.add_option("--database_decoy_regex")
|
|
272 parser.add_option("--output")
|
|
273 parser.add_option("--output_driver")
|
|
274 populate_threshold_options(parser)
|
|
275 (options, args) = parser.parse_args()
|
|
276
|
|
277 template_parameters = {}
|
|
278
|
|
279 # Read samples from config file and convert to XML
|
|
280 template_parameters["samples"] = build_samples(options.samples)
|
|
281 template_parameters["display_thresholds"] = build_display_thresholds(options)
|
|
282
|
|
283 # Setup database parameters
|
|
284 database_path = options.database
|
|
285 database_name = options.database_name
|
|
286 database_type = options.database_type
|
|
287 database_decoy_regex = options.database_decoy_regex
|
|
288
|
|
289 (accession_regex, description_regex) = database_rules(database_type)
|
|
290
|
|
291 template_parameters["database_path"] = database_path
|
|
292 template_parameters["database_name"] = database_name
|
|
293 template_parameters["database_accession_regex"] = escape(accession_regex)
|
|
294 template_parameters["database_description_regex"] = escape(description_regex)
|
|
295 template_parameters["database_decoy_regex"] = escape(database_decoy_regex)
|
|
296
|
|
297 execute_scaffold(options, RUN_TEMPLATE, template_parameters)
|
|
298
|
|
299 if options.output_driver:
|
|
300 shutil.copy("driver.xml", options.output_driver)
|
|
301
|
|
302
|
|
303 def execute_scaffold(options, template, template_parameters):
|
|
304 # Setup output parameter
|
|
305 output_path = options.output
|
|
306 template_parameters["output_path"] = output_path
|
|
307
|
|
308 # Prepare and create driver file
|
|
309 driver_contents = Template(template).substitute(template_parameters)
|
1
|
310 print(driver_contents)
|
0
|
311 driver_path = os.path.abspath("driver.xml")
|
|
312 open(driver_path, "w").write(driver_contents)
|
|
313
|
|
314 # Run Scaffold
|
|
315 execute("ScaffoldBatch3 '%s'" % driver_path)
|
|
316
|
|
317 if __name__ == '__main__':
|
|
318 __main__()
|