annotate scaffold_wrapper.py @ 1:0d0cbb69a03f draft

Uploaded
author galaxyp
date Fri, 26 Sep 2014 15:10:34 -0400
parents e9981e6af666
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
2 import optparse
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
3 import os
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
4 import shutil
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
5 import sys
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
6 import tempfile
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
7 import subprocess
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
8 import logging
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
9 from string import Template
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
10 from xml.sax.saxutils import escape
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
11
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
12 log = logging.getLogger(__name__)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
13
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
14 DEBUG = True
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
15
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
16 working_directory = os.getcwd()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
17 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
18 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
19
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
20
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
21 def stop_err(msg):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
22 sys.stderr.write("%s\n" % msg)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
23 sys.exit()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
24
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
25
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
26 def read_stderr():
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
27 stderr = ''
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
28 if(os.path.exists(tmp_stderr_name)):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
29 with open(tmp_stderr_name, 'rb') as tmp_stderr:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
30 buffsize = 1048576
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
31 try:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
32 while True:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
33 stderr += tmp_stderr.read(buffsize)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
34 if not stderr or len(stderr) % buffsize != 0:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
35 break
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
36 except OverflowError:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
37 pass
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
38 return stderr
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
39
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
40
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
41 def execute(command, stdin=None):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
42 try:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
43 with open(tmp_stderr_name, 'wb') as tmp_stderr:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
44 with open(tmp_stdout_name, 'wb') as tmp_stdout:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
45 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
46 returncode = proc.wait()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
47 if returncode != 0:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
48 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
49 finally:
1
0d0cbb69a03f Uploaded
galaxyp
parents: 0
diff changeset
50 print((open(tmp_stderr_name, "r").read(64000)))
0d0cbb69a03f Uploaded
galaxyp
parents: 0
diff changeset
51 print((open(tmp_stdout_name, "r").read(64000)))
0
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
52
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
53
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
54 def delete_file(path):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
55 if os.path.exists(path):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
56 try:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
57 os.remove(path)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
58 except:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
59 pass
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
60
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
61
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
62 def delete_directory(directory):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
63 if os.path.exists(directory):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
64 try:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
65 shutil.rmtree(directory)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
66 except:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
67 pass
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
68
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
69
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
70 def symlink(source, link_name):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
71 import platform
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
72 if platform.system() == 'Windows':
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
73 try:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
74 import win32file
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
75 win32file.CreateSymbolicLink(source, link_name, 1)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
76 except:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
77 shutil.copy(source, link_name)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
78 else:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
79 os.symlink(source, link_name)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
80
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
81
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
82 def copy_to_working_directory(data_file, relative_path):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
83 if os.path.abspath(data_file) != os.path.abspath(relative_path):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
84 shutil.copy(data_file, relative_path)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
85 return relative_path
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
86
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
87
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
88 def __main__():
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
89 run_script()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
90
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
91
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
92 # Extra database attributes: name, databaseAccessionRegEx, databaseDescriptionRegEx, decoyProteinRegEx
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
93 # Extra export types: protxml, spectrum-report, statistics, peptide-report, protein-report, experiment-report
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
94 RUN_TEMPLATE = """<Scaffold>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
95 <Experiment name="Galaxy Scaffold Experiment">
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
96 <FastaDatabase id="database"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
97 path="$database_path"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
98 name="$database_name"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
99 databaseAccessionRegEx="$database_accession_regex"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
100 databaseDescriptionRegEx="$database_description_regex"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
101 decoyProteinRegEx="$database_decoy_regex"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
102 />
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
103 $samples
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
104 $display_thresholds
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
105 <Export type="sf3" path="$output_path" thresholds="thresh" />
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
106 </Experiment>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
107 </Scaffold>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
108 """
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
109
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
110 EXPORT_TEMPLATE = """<Scaffold>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
111 <Experiment load="$sf3_path">
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
112 $display_thresholds
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
113 <Export $export_options path="$output_path" thresholds="thresh" />
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
114 </Experiment>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
115 </Scaffold>
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
116 """
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
117
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
118 def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
119 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
120 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
121 cur_group = None
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
122 i = 0
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
123 group_prefixes = ["%s:" % group_part for group_part in group_parts]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
124 input_prefixes = ["%s:" % input_part for input_part in input_parts]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
125 groups = {}
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
126 while i < len(inputs_lines):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
127 line = inputs_lines[i]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
128 if line.startswith(group_prefixes[0]):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
129 # Start new group
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
130 cur_group = line[len(group_prefixes[0]):]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
131 group_data = {}
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
132 for j, group_prefix in enumerate(group_prefixes):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
133 group_line = inputs_lines[i + j]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
134 group_data[group_parts[j]] = group_line[len(group_prefix):]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
135 i += len(group_prefixes)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
136 elif line.startswith(input_prefixes[0]):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
137 input = []
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
138 for j, input_prefix in enumerate(input_prefixes):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
139 part_line = inputs_lines[i + j]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
140 part = part_line[len(input_prefixes[j]):]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
141 input.append(part)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
142 if cur_group not in groups:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
143 groups[cur_group] = {"group_data": group_data, "inputs": []}
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
144 groups[cur_group]["inputs"].append(input)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
145 i += len(input_prefixes)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
146 else:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
147 # Skip empty line
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
148 i += 1
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
149 return groups
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
150
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
151
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
152 def build_samples(samples_file):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
153 group_data = parse_groups(samples_file, group_parts=["sample", "mudpit", "category"], input_parts=["name", "path", "ext"])
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
154 samples_description = ""
1
0d0cbb69a03f Uploaded
galaxyp
parents: 0
diff changeset
155 for sample_name, sample_data in list(group_data.items()):
0
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
156 files = sample_data["inputs"]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
157 mudpit = sample_data["group_data"]["mudpit"]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
158 category = sample_data["group_data"]["category"]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
159 samples_description += """<BiologicalSample database="database" name="%s" mudpit="%s" category="%s">\n""" % (sample_name, mudpit, category)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
160 for (name, path, ext) in files:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
161 name = os.path.basename(name)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
162 if not name.lower().endswith(ext.lower()):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
163 name = "%s.%s" % (name, ext)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
164 symlink(path, name)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
165 samples_description += "<InputFile>%s</InputFile>\n" % os.path.abspath(name)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
166 samples_description += """</BiologicalSample>\n"""
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
167 return samples_description
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
168
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
169
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
170 def run_script():
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
171 action = sys.argv[1]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
172 if action == "run":
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
173 proc = scaffold_run
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
174 elif action == "export":
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
175 proc = scaffold_export
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
176 proc()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
177
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
178
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
179 def scaffold_export():
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
180 parser = optparse.OptionParser()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
181 parser.add_option("--sf3")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
182 parser.add_option("--output")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
183 parser.add_option("--export_type")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
184 populate_threshold_options(parser)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
185 (options, args) = parser.parse_args()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
186
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
187 template_parameters = {}
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
188
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
189 template_parameters["sf3_path"] = options.sf3
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
190 template_parameters["export_options"] = """ type="%s" """ % options.export_type
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
191 template_parameters["display_thresholds"] = build_display_thresholds(options)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
192
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
193 execute_scaffold(options, EXPORT_TEMPLATE, template_parameters)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
194
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
195
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
196 def build_display_thresholds(options):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
197 attributes = ['id="thresh"']
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
198 if options.protein_probability is not None:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
199 attributes.append('proteinProbability="%s"' % options.protein_probability)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
200 if options.peptide_probability is not None:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
201 attributes.append('peptideProbability="%s"' % options.peptide_probability)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
202 if options.minimum_peptide_count is not None:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
203 attributes.append('minimumPeptideCount="%s"' % options.minimum_peptide_count)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
204 if options.minimum_peptide_length is not None:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
205 attributes.append('minimumPeptideLength="%s"' % options.minimum_peptide_length)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
206 if options.minimum_ntt is not None:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
207 attributes.append('minimumNTT="%s"' % options.minimum_ntt)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
208 attributes.append('useCharge="%s"' % build_use_charge_option(options))
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
209 tag_open = "<DisplayThresholds " + " ".join(attributes) + ">"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
210 tag_body = "".join([f(options) for f in [tandem_opts, omssa_opts]])
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
211 tag_close = "</DisplayThresholds>"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
212 return tag_open + tag_body + tag_close
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
213
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
214
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
215 def tandem_opts(options):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
216 element = ""
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
217 tandem_score = options.tandem_score
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
218 if tandem_score:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
219 element = '<TandemThresholds logExpectScores="%s,%s,%s,%s" />' % ((tandem_score,) * 4)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
220 return element
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
221
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
222
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
223 def omssa_opts(options):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
224 return ""
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
225
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
226
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
227 def build_use_charge_option(options):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
228 use_charge_array = []
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
229 for i in ["1", "2", "3", "4"]:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
230 use_charge_i = getattr(options, "use_charge_%s" % i, True)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
231 use_charge_array.append("true" if use_charge_i else "false")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
232 return ",".join(use_charge_array)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
233
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
234
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
235 def populate_threshold_options(option_parser):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
236 option_parser.add_option("--protein_probability", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
237 option_parser.add_option("--peptide_probability", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
238 option_parser.add_option("--minimum_peptide_count", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
239 option_parser.add_option("--ignore_charge_1", action="store_false", dest="use_charge_1", default=True)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
240 option_parser.add_option("--ignore_charge_2", action="store_false", dest="use_charge_2", default=True)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
241 option_parser.add_option("--ignore_charge_3", action="store_false", dest="use_charge_3", default=True)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
242 option_parser.add_option("--ignore_charge_4", action="store_false", dest="use_charge_4", default=True)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
243 option_parser.add_option("--minimum_peptide_length", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
244 option_parser.add_option("--minimum_ntt", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
245 option_parser.add_option("--tandem_score", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
246 option_parser.add_option("--omssa_peptide_probability", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
247 option_parser.add_option("--omssa_log_expect_score", default=None)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
248
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
249
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
250 def database_rules(database_type):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
251 rules_dict = {
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
252 "ESTNR": (">(gi\\|[0-9]*)", ">[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
253 "IPI": (">IPI:([^\\| .]*)", ">[^ ]* Tax_Id=[0-9]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
254 "SWISSPROT": (">([^ ]*)", ">[^ ]* \\([^ ]*\\) (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
255 "UNIPROT": (">[^ ]*\\|([^ ]*)", ">[^ ]*\\|[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
256 "UNIREF": (">UniRef100_([^ ]*)", ">[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
257 "ENSEMBL": (">(ENS[^ ]*)", ">[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
258 "MSDB": (">([^ ]*)", ">[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
259 "GENERIC": (">([^ ]*)", ">[^ ]* (.*)"),
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
260 }
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
261 database_type = database_type if database_type in rules_dict else "GENERIC"
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
262 return rules_dict[database_type]
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
263
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
264
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
265 def scaffold_run():
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
266 parser = optparse.OptionParser()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
267 parser.add_option("--samples")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
268 parser.add_option("--database")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
269 parser.add_option("--database_name")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
270 parser.add_option("--database_type")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
271 parser.add_option("--database_decoy_regex")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
272 parser.add_option("--output")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
273 parser.add_option("--output_driver")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
274 populate_threshold_options(parser)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
275 (options, args) = parser.parse_args()
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
276
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
277 template_parameters = {}
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
278
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
279 # Read samples from config file and convert to XML
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
280 template_parameters["samples"] = build_samples(options.samples)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
281 template_parameters["display_thresholds"] = build_display_thresholds(options)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
282
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
283 # Setup database parameters
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
284 database_path = options.database
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
285 database_name = options.database_name
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
286 database_type = options.database_type
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
287 database_decoy_regex = options.database_decoy_regex
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
288
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
289 (accession_regex, description_regex) = database_rules(database_type)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
290
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
291 template_parameters["database_path"] = database_path
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
292 template_parameters["database_name"] = database_name
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
293 template_parameters["database_accession_regex"] = escape(accession_regex)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
294 template_parameters["database_description_regex"] = escape(description_regex)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
295 template_parameters["database_decoy_regex"] = escape(database_decoy_regex)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
296
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
297 execute_scaffold(options, RUN_TEMPLATE, template_parameters)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
298
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
299 if options.output_driver:
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
300 shutil.copy("driver.xml", options.output_driver)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
301
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
302
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
303 def execute_scaffold(options, template, template_parameters):
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
304 # Setup output parameter
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
305 output_path = options.output
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
306 template_parameters["output_path"] = output_path
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
307
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
308 # Prepare and create driver file
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
309 driver_contents = Template(template).substitute(template_parameters)
1
0d0cbb69a03f Uploaded
galaxyp
parents: 0
diff changeset
310 print(driver_contents)
0
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
311 driver_path = os.path.abspath("driver.xml")
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
312 open(driver_path, "w").write(driver_contents)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
313
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
314 # Run Scaffold
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
315 execute("ScaffoldBatch3 '%s'" % driver_path)
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
316
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
317 if __name__ == '__main__':
e9981e6af666 Improved some datatype handling
galaxyp
parents:
diff changeset
318 __main__()