Mercurial > repos > peterjc > effectivet3
annotate tools/protein_analysis/effectiveT3.py @ 1:e607c342312f
Wrapper v0.0.8, includes effectiveT3.loc.sample file
author | peterjc |
---|---|
date | Tue, 02 Aug 2011 07:06:13 -0400 |
parents | 43436379876f |
children |
rev | line source |
---|---|
0
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
2 """Wrapper for EffectiveT3 v1.0.1 for use in Galaxy. |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
3 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
4 This script takes exactly five command line arguments: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
5 * model name (e.g. TTSS_STD-1.0.1.jar) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
6 * threshold (selective or sensitive) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
7 * an input protein FASTA filename |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
8 * output tabular filename |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
9 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
10 It then calls the standalone Effective T3 v1.0.1 program (not the |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
11 webservice), and reformats the semi-colon separated output into |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
12 tab separated output for use in Galaxy. |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
13 """ |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
14 import sys |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
15 import os |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
16 import subprocess |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
17 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
18 #You may need to edit this to match your local setup, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
19 effectiveT3_jar = "/opt/EffectiveT3/TTSS_GUI-1.0.1.jar" |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
20 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
21 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
22 def stop_err(msg, error_level=1): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
23 """Print error message to stdout and quit with given error level.""" |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
24 sys.stderr.write("%s\n" % msg) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
25 sys.exit(error_level) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
26 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
27 if len(sys.argv) != 5: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
28 stop_err("Require four arguments: model, threshold, input protein FASTA file & output tabular file") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
29 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
30 model, threshold, fasta_file, tabular_file = sys.argv[1:] |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
31 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
32 if not os.path.isfile(fasta_file): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
33 stop_err("Input FASTA file not found: %s" % fasta_file) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
34 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
35 if threshold not in ["selective", "sensitive"] \ |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
36 and not threshold.startswith("cutoff="): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
37 stop_err("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
38 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
39 def clean_tabular(raw_handle, out_handle): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
40 """Clean up Effective T3 output to make it tabular.""" |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
41 count = 0 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
42 positive = 0 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
43 errors = 0 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
44 for line in raw_handle: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
45 if not line or line.startswith("#") \ |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
46 or line.startswith("Id; Description; Score;"): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
47 continue |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
48 assert line.count(";") >= 3, repr(line) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
49 #Normally there will just be three semi-colons, however the |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
50 #original FASTA file's ID or description might have had |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
51 #semi-colons in it as well, hence the following hackery: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
52 try: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
53 id_descr, score, effective = line.rstrip("\r\n").rsplit(";",2) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
54 #Cope when there was no FASTA description |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
55 if "; " not in id_descr and id_descr.endswith(";"): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
56 id = id_descr[:-1] |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
57 descr = "" |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
58 else: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
59 id, descr = id_descr.split("; ",1) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
60 except ValueError: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
61 stop_err("Problem parsing line:\n%s\n" % line) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
62 parts = [s.strip() for s in [id, descr, score, effective]] |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
63 out_handle.write("\t".join(parts) + "\n") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
64 count += 1 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
65 if float(score) < 0: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
66 errors += 1 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
67 if effective.lower() == "true": |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
68 positive += 1 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
69 return count, positive, errors |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
70 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
71 def run(cmd): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
72 #Avoid using shell=True when we call subprocess to ensure if the Python |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
73 #script is killed, so too is the child process. |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
74 try: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
75 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
76 except Exception, err: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
77 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
78 #Use .communicate as can get deadlocks with .wait(), |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
79 stdout, stderr = child.communicate() |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
80 return_code = child.returncode |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
81 if return_code: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
82 if stderr and stdout: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
83 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
84 else: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
85 stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
86 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
87 if not os.path.isfile(effectiveT3_jar): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
88 stop_err("Effective T3 JAR file not found: %s" % effectiveT3_jar) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
89 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
90 effectiveT3_dir = os.path.dirname(effectiveT3_jar) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
91 if not os.path.isdir(effectiveT3_dir): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
92 stop_err("Effective T3 folder not found: %s" % effectiveT3_dir) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
93 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
94 effectiveT3_model = os.path.join(effectiveT3_dir, "module", model) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
95 if not os.path.isfile(effectiveT3_model): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
96 stop_err("Effective T3 model JAR file not found: %s" % effectiveT3_model) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
97 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
98 #We will have write access whereever the output should be, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
99 temp_file = os.path.abspath(tabular_file + ".tmp") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
100 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
101 #Use absolute paths since will change current directory... |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
102 tabular_file = os.path.abspath(tabular_file) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
103 fasta_file = os.path.abspath(fasta_file) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
104 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
105 cmd = ["java", "-jar", effectiveT3_jar, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
106 "-f", fasta_file, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
107 "-m", model, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
108 "-t", threshold, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
109 "-o", temp_file, |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
110 "-q"] |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
111 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
112 try: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
113 #Must run from directory above the module subfolder: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
114 os.chdir(effectiveT3_dir) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
115 except: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
116 stop_err("Could not change to Effective T3 folder: %s" % effectiveT3_dir) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
117 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
118 run(cmd) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
119 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
120 if not os.path.isfile(temp_file): |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
121 stop_err("ERROR - No output file from Effective T3") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
122 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
123 out_handle = open(tabular_file, "w") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
124 out_handle.write("#ID\tDescription\tScore\tEffective\n") |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
125 data_handle = open(temp_file) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
126 count, positive, errors = clean_tabular(data_handle, out_handle) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
127 data_handle.close() |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
128 out_handle.close() |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
129 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
130 os.remove(temp_file) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
131 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
132 if errors: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
133 print "%i sequences, %i positive, %i errors" \ |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
134 % (count, positive, errors) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
135 else: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
136 print "%i/%i sequences positive" % (positive, count) |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
137 |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
138 if count and count==errors: |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
139 #Galaxy will still allow them to see the output file |
43436379876f
Migrated tool version 0.0.7 from old tool shed archive to new tool shed repository
peterjc
parents:
diff
changeset
|
140 stop_err("All your sequences gave an error code") |