comparison tools/effectiveT3/effectiveT3.py @ 8:60a9b3f760cc draft

v0.0.17 Used cached URL, python style updates
author peterjc
date Wed, 01 Feb 2017 09:22:21 -0500
parents 5f85301d50bf
children 512530020360
comparison
equal deleted inserted replaced
7:5f85301d50bf 8:60a9b3f760cc
14 import sys 14 import sys
15 import os 15 import os
16 import subprocess 16 import subprocess
17 17
18 # The Galaxy auto-install via tool_dependencies.xml will set this environment variable 18 # The Galaxy auto-install via tool_dependencies.xml will set this environment variable
19 effectiveT3_dir = os.environ.get("EFFECTIVET3", "/opt/EffectiveT3/") 19 effective_t3_dir = os.environ.get("EFFECTIVET3", "/opt/EffectiveT3/")
20 effectiveT3_jar = os.path.join(effectiveT3_dir, "TTSS_GUI-1.0.1.jar") 20 effective_t3_jar = os.path.join(effective_t3_dir, "TTSS_GUI-1.0.1.jar")
21 21
22 if "-v" in sys.argv or "--version" in sys.argv: 22 if "-v" in sys.argv or "--version" in sys.argv:
23 # TODO - Get version of the JAR file dynamically? 23 # TODO - Get version of the JAR file dynamically?
24 print("Wrapper v0.0.16, TTSS_GUI-1.0.1.jar") 24 print("Wrapper v0.0.17, TTSS_GUI-1.0.1.jar")
25 sys.exit(0) 25 sys.exit(0)
26 26
27 def sys_exit(msg, error_level=1):
28 """Print error message to stdout and quit with given error level."""
29 sys.stderr.write("%s\n" % msg)
30 sys.exit(error_level)
31
32 if len(sys.argv) != 5: 27 if len(sys.argv) != 5:
33 sys_exit("Require four arguments: model, threshold, input protein FASTA file & output tabular file") 28 sys.exit("Require four arguments: model, threshold, input protein FASTA file & output tabular file")
34 29
35 model, threshold, fasta_file, tabular_file = sys.argv[1:] 30 model, threshold, fasta_file, tabular_file = sys.argv[1:]
36 31
37 if not os.path.isfile(fasta_file): 32 if not os.path.isfile(fasta_file):
38 sys_exit("Input FASTA file not found: %s" % fasta_file) 33 sys.exit("Input FASTA file not found: %s" % fasta_file)
39 34
40 if threshold not in ["selective", "sensitive"] \ 35 if threshold not in ["selective", "sensitive"] \
41 and not threshold.startswith("cutoff="): 36 and not threshold.startswith("cutoff="):
42 sys_exit("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold) 37 sys.exit("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold)
38
43 39
44 def clean_tabular(raw_handle, out_handle): 40 def clean_tabular(raw_handle, out_handle):
45 """Clean up Effective T3 output to make it tabular.""" 41 """Clean up Effective T3 output to make it tabular."""
46 count = 0 42 count = 0
47 positive = 0 43 positive = 0
48 errors = 0 44 errors = 0
49 for line in raw_handle: 45 for line in raw_handle:
50 if not line or line.startswith("#") \ 46 if not line or line.startswith("#") \
51 or line.startswith("Id; Description; Score;"): 47 or line.startswith("Id; Description; Score;"):
52 continue 48 continue
53 assert line.count(";") >= 3, repr(line) 49 assert line.count(";") >= 3, repr(line)
54 # Normally there will just be three semi-colons, however the 50 # Normally there will just be three semi-colons, however the
55 # original FASTA file's ID or description might have had 51 # original FASTA file's ID or description might have had
56 # semi-colons in it as well, hence the following hackery: 52 # semi-colons in it as well, hence the following hackery:
57 try: 53 try:
58 id_descr, score, effective = line.rstrip("\r\n").rsplit(";",2) 54 id_descr, score, effective = line.rstrip("\r\n").rsplit(";", 2)
59 # Cope when there was no FASTA description 55 # Cope when there was no FASTA description
60 if "; " not in id_descr and id_descr.endswith(";"): 56 if "; " not in id_descr and id_descr.endswith(";"):
61 id = id_descr[:-1] 57 id = id_descr[:-1]
62 descr = "" 58 descr = ""
63 else: 59 else:
64 id, descr = id_descr.split("; ",1) 60 id, descr = id_descr.split("; ", 1)
65 except ValueError: 61 except ValueError:
66 sys_exit("Problem parsing line:\n%s\n" % line) 62 sys.exit("Problem parsing line:\n%s\n" % line)
67 parts = [s.strip() for s in [id, descr, score, effective]] 63 parts = [s.strip() for s in [id, descr, score, effective]]
68 out_handle.write("\t".join(parts) + "\n") 64 out_handle.write("\t".join(parts) + "\n")
69 count += 1 65 count += 1
70 if float(score) < 0: 66 if float(score) < 0:
71 errors += 1 67 errors += 1
72 if effective.lower() == "true": 68 if effective.lower() == "true":
73 positive += 1 69 positive += 1
74 return count, positive, errors 70 return count, positive, errors
71
75 72
76 def run(cmd): 73 def run(cmd):
77 # Avoid using shell=True when we call subprocess to ensure if the Python 74 # Avoid using shell=True when we call subprocess to ensure if the Python
78 # script is killed, so too is the child process. 75 # script is killed, so too is the child process.
79 try: 76 try:
80 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 77 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
81 except Exception, err: 78 except Exception, err:
82 sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 79 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
83 # Use .communicate as can get deadlocks with .wait(), 80 # Use .communicate as can get deadlocks with .wait(),
84 stdout, stderr = child.communicate() 81 stdout, stderr = child.communicate()
85 return_code = child.returncode 82 return_code = child.returncode
86 if return_code or stderr.startswith("Exception in thread"): 83 if return_code or stderr.startswith("Exception in thread"):
87 cmd_str= " ".join(cmd) # doesn't quote spaces etc 84 cmd_str = " ".join(cmd) # doesn't quote spaces etc
88 if stderr and stdout: 85 if stderr and stdout:
89 sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) 86 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
90 else: 87 else:
91 sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) 88 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
92 89
93 90
94 if not os.path.isdir(effectiveT3_dir): 91 if not os.path.isdir(effective_t3_dir):
95 sys_exit("Effective T3 folder not found: %r" % effectiveT3_dir) 92 sys.exit("Effective T3 folder not found: %r" % effective_t3_dir)
96 93
97 if not os.path.isfile(effectiveT3_jar): 94 if not os.path.isfile(effective_t3_jar):
98 sys_exit("Effective T3 JAR file not found: %r" % effectiveT3_jar) 95 sys.exit("Effective T3 JAR file not found: %r" % effective_t3_jar)
99 96
100 if not os.path.isdir(os.path.join(effectiveT3_dir, "module")): 97 if not os.path.isdir(os.path.join(effective_t3_dir, "module")):
101 sys_exit("Effective T3 module folder not found: %r" % os.path.join(effectiveT3_dir, "module")) 98 sys.exit("Effective T3 module folder not found: %r" % os.path.join(effective_t3_dir, "module"))
102 99
103 effectiveT3_model = os.path.join(effectiveT3_dir, "module", model) 100 effective_t3_model = os.path.join(effective_t3_dir, "module", model)
104 if not os.path.isfile(effectiveT3_model): 101 if not os.path.isfile(effective_t3_model):
105 sys.stderr.write("Contents of %r is %s\n" 102 sys.stderr.write("Contents of %r is %s\n"
106 % (os.path.join(effectiveT3_dir, "module"), 103 % (os.path.join(effective_t3_dir, "module"),
107 ", ".join(repr(p) for p in os.listdir(os.path.join(effectiveT3_dir, "module"))))) 104 ", ".join(repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module")))))
108 sys.stderr.write("Main JAR was found: %r\n" % effectiveT3_jar) 105 sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar)
109 sys_exit("Effective T3 model JAR file not found: %r" % effectiveT3_model) 106 sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model)
110 107
111 # We will have write access whereever the output should be, 108 # We will have write access whereever the output should be,
112 temp_file = os.path.abspath(tabular_file + ".tmp") 109 temp_file = os.path.abspath(tabular_file + ".tmp")
113 110
114 # Use absolute paths since will change current directory... 111 # Use absolute paths since will change current directory...
115 tabular_file = os.path.abspath(tabular_file) 112 tabular_file = os.path.abspath(tabular_file)
116 fasta_file = os.path.abspath(fasta_file) 113 fasta_file = os.path.abspath(fasta_file)
117 114
118 cmd = ["java", "-jar", effectiveT3_jar, 115 cmd = ["java", "-jar", effective_t3_jar,
119 "-f", fasta_file, 116 "-f", fasta_file,
120 "-m", model, 117 "-m", model,
121 "-t", threshold, 118 "-t", threshold,
122 "-o", temp_file, 119 "-o", temp_file,
123 "-q"] 120 "-q"]
124 121
125 try: 122 try:
126 # Must run from directory above the module subfolder: 123 # Must run from directory above the module subfolder:
127 os.chdir(effectiveT3_dir) 124 os.chdir(effective_t3_dir)
128 except: 125 except Exception:
129 sys_exit("Could not change to Effective T3 folder: %s" % effectiveT3_dir) 126 sys.exit("Could not change to Effective T3 folder: %s" % effective_t3_dir)
130 127
131 run(cmd) 128 run(cmd)
132 129
133 if not os.path.isfile(temp_file): 130 if not os.path.isfile(temp_file):
134 sys_exit("ERROR - No output file from Effective T3") 131 sys.exit("ERROR - No output file from Effective T3")
135 132
136 out_handle = open(tabular_file, "w") 133 out_handle = open(tabular_file, "w")
137 out_handle.write("#ID\tDescription\tScore\tEffective\n") 134 out_handle.write("#ID\tDescription\tScore\tEffective\n")
138 data_handle = open(temp_file) 135 data_handle = open(temp_file)
139 count, positive, errors = clean_tabular(data_handle, out_handle) 136 count, positive, errors = clean_tabular(data_handle, out_handle)
141 out_handle.close() 138 out_handle.close()
142 139
143 os.remove(temp_file) 140 os.remove(temp_file)
144 141
145 if errors: 142 if errors:
146 print("%i sequences, %i positive, %i errors" 143 print("%i sequences, %i positive, %i errors"
147 % (count, positive, errors)) 144 % (count, positive, errors))
148 else: 145 else:
149 print("%i/%i sequences positive" % (positive, count)) 146 print("%i/%i sequences positive" % (positive, count))
150 147
151 if count and count==errors: 148 if count and count == errors:
152 # Galaxy will still allow them to see the output file 149 # Galaxy will still allow them to see the output file
153 sys_exit("All your sequences gave an error code") 150 sys.exit("All your sequences gave an error code")