comparison tools/effectiveT3/effectiveT3.py @ 10:a46d7861c32c draft

"Update all the pico_galaxy tools on main Tool Shed"
author peterjc
date Fri, 16 Apr 2021 22:34:56 +0000
parents 512530020360
children ed8c1babc166
comparison
equal deleted inserted replaced
9:512530020360 10:a46d7861c32c
10 It then calls the standalone Effective T3 v1.0.1 program (not the 10 It then calls the standalone Effective T3 v1.0.1 program (not the
11 webservice), and reformats the semi-colon separated output into 11 webservice), and reformats the semi-colon separated output into
12 tab separated output for use in Galaxy. 12 tab separated output for use in Galaxy.
13 """ 13 """
14 import os 14 import os
15
16 # We want to be able to use shutil.which, but need Python 3.3+
17 # import shutil
15 import subprocess 18 import subprocess
16 import sys 19 import sys
17 20
18 # The Galaxy auto-install via tool_dependencies.xml will set this environment variable 21 # The Galaxy auto-install via tool_dependencies.xml will set the
19 effective_t3_dir = os.environ.get("EFFECTIVET3", "/opt/EffectiveT3/") 22 # environment variable $EFFECTIVET3 pointing at the folder with
20 effective_t3_jar = os.path.join(effective_t3_dir, "TTSS_GUI-1.0.1.jar") 23 # the JAR file.
24 #
25 # The BioConda recipe will put a wrapper script on the $PATH,
26 # which we can use to find the JAR file.
27 #
28 # We fall back on /opt/EffectiveT3/
29 #
30 effective_t3_jarname = "TTSS_GUI-1.0.1.jar"
21 31
22 if "-v" in sys.argv or "--version" in sys.argv: 32 if "-v" in sys.argv or "--version" in sys.argv:
23 # TODO - Get version of the JAR file dynamically? 33 # TODO - Get version of the JAR file dynamically?
24 print("Wrapper v0.0.17, TTSS_GUI-1.0.1.jar") 34 print("Wrapper v0.0.20, for %s" % effective_t3_jarname)
25 sys.exit(0) 35 sys.exit(0)
26 36
27 if len(sys.argv) != 5: 37 if len(sys.argv) != 5:
28 sys.exit("Require four arguments: model, threshold, input protein FASTA file & output tabular file") 38 sys.exit(
39 "Require four arguments: model, threshold, input protein "
40 "FASTA file & output tabular file"
41 )
29 42
30 model, threshold, fasta_file, tabular_file = sys.argv[1:] 43 model, threshold, fasta_file, tabular_file = sys.argv[1:]
31 44
32 if not os.path.isfile(fasta_file): 45 if not os.path.isfile(fasta_file):
33 sys.exit("Input FASTA file not found: %s" % fasta_file) 46 sys.exit("Input FASTA file not found: %s" % fasta_file)
34 47
35 if threshold not in ["selective", "sensitive"] and not threshold.startswith("cutoff="): 48 if threshold not in ["selective", "sensitive"] and not threshold.startswith("cutoff="):
36 sys.exit("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold) 49 sys.exit(
50 "Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold
51 )
37 52
38 53
39 def clean_tabular(raw_handle, out_handle): 54 def clean_tabular(raw_handle, out_handle):
40 """Clean up Effective T3 output to make it tabular.""" 55 """Clean up Effective T3 output to make it tabular."""
41 count = 0 56 count = 0
42 positive = 0 57 positive = 0
43 errors = 0 58 errors = 0
44 for line in raw_handle: 59 for line in raw_handle:
45 if not line or line.startswith("#") or line.startswith("Id; Description; Score;"): 60 if (
61 not line
62 or line.startswith("#")
63 or line.startswith("Id; Description; Score;")
64 ):
46 continue 65 continue
47 assert line.count(";") >= 3, repr(line) 66 assert line.count(";") >= 3, repr(line)
48 # Normally there will just be three semi-colons, however the 67 # Normally there will just be three semi-colons, however the
49 # original FASTA file's ID or description might have had 68 # original FASTA file's ID or description might have had
50 # semi-colons in it as well, hence the following hackery: 69 # semi-colons in it as well, hence the following hackery:
71 def run(cmd): 90 def run(cmd):
72 """Run the command line string via subprocess.""" 91 """Run the command line string via subprocess."""
73 # Avoid using shell=True when we call subprocess to ensure if the Python 92 # Avoid using shell=True when we call subprocess to ensure if the Python
74 # script is killed, so too is the child process. 93 # script is killed, so too is the child process.
75 try: 94 try:
76 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 95 child = subprocess.Popen(
77 except Exception, err: 96 cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
97 )
98 except Exception as err:
78 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 99 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
79 # Use .communicate as can get deadlocks with .wait(), 100 # Use .communicate as can get deadlocks with .wait(),
80 stdout, stderr = child.communicate() 101 stdout, stderr = child.communicate()
81 return_code = child.returncode 102 return_code = child.returncode
82 if return_code or stderr.startswith("Exception in thread"): 103 if return_code or stderr.startswith("Exception in thread"):
83 cmd_str = " ".join(cmd) # doesn't quote spaces etc 104 cmd_str = " ".join(cmd) # doesn't quote spaces etc
84 if stderr and stdout: 105 if stderr and stdout:
85 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) 106 sys.exit(
107 "Return code %i from command:\n%s\n\n%s\n\n%s"
108 % (return_code, cmd_str, stdout, stderr)
109 )
86 else: 110 else:
87 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) 111 sys.exit(
88 112 "Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)
89 113 )
90 if not os.path.isdir(effective_t3_dir): 114
91 sys.exit("Effective T3 folder not found: %r" % effective_t3_dir) 115
92 116 try:
93 if not os.path.isfile(effective_t3_jar): 117 from shutil import which
94 sys.exit("Effective T3 JAR file not found: %r" % effective_t3_jar) 118 except ImportError:
119 # Likely running on Python 2, use backport:
120 def which(cmd, mode=os.F_OK | os.X_OK, path=None):
121 """Python implementation of command line tool which.
122
123 Given a command, mode, and a PATH string, return the path which
124 conforms to the given mode on the PATH, or None if there is no such
125 file.
126
127 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
128 of os.environ.get("PATH"), or can be overridden with a custom search
129 path.
130 """
131 # Check that a given file can be accessed with the correct mode.
132 # Additionally check that `file` is not a directory, as on Windows
133 # directories pass the os.access check.
134 def _access_check(fn, mode):
135 return os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)
136
137 # Short circuit. If we're given a full path which matches the mode
138 # and it exists, we're done here.
139 if _access_check(cmd, mode):
140 return cmd
141
142 path = (path or os.environ.get("PATH", os.defpath)).split(os.pathsep)
143
144 if sys.platform == "win32":
145 # The current directory takes precedence on Windows.
146 if os.curdir not in path:
147 path.insert(0, os.curdir)
148
149 # PATHEXT is necessary to check on Windows.
150 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
151 # See if the given file matches any of the expected path extensions.
152 # This will allow us to short circuit when given "python.exe".
153 matches = [cmd for ext in pathext if cmd.lower().endswith(ext.lower())]
154 # If it does match, only test that one, otherwise we have to try
155 # others.
156 files = [cmd] if matches else [cmd + ext.lower() for ext in pathext]
157 else:
158 # On other platforms you don't have things like PATHEXT to tell you
159 # what file suffixes are executable, so just pass on cmd as-is.
160 files = [cmd]
161
162 seen = set()
163 for dir in path:
164 dir = os.path.normcase(dir)
165 if dir not in seen:
166 seen.add(dir)
167 for thefile in files:
168 name = os.path.join(dir, thefile)
169 if _access_check(name, mode):
170 return name
171 return None
172
173
174 # Try in order the following to find the JAR file:
175 # - Location of any wrapper script, e.g. from BioConda installation
176 # - The $EFFECTIVET3 env var, e.g. old-style Galaxy tool installation
177 # - The /opt/EffectiveT3/ folder.
178 effective_t3_jar = None
179 effective_t3_dir = None
180 dirs = ["/opt/EffectiveT3/"]
181 if "EFFECTIVET3" in os.environ:
182 dirs.insert(0, os.environ.get("EFFECTIVET3"))
183 if which("effectivet3"):
184 # Assuming this is a BioConda installed wrapper for effective T3,
185 # this will get the directory of the wrapper script which is where
186 # the JAR file will be:
187 dirs.insert(0, os.path.split(os.path.realpath(which("effectivet3")))[0])
188 for effective_t3_dir in dirs:
189 effective_t3_jar = os.path.join(effective_t3_dir, effective_t3_jarname)
190 if os.path.isfile(effective_t3_jar):
191 # Good
192 break
193 effective_t3_jar = None
194 if not effective_t3_dir or not effective_t3_jar:
195 sys.exit("Effective T3 JAR file %r not found in %r" % (effective_t3_jarname, dirs))
95 196
96 if not os.path.isdir(os.path.join(effective_t3_dir, "module")): 197 if not os.path.isdir(os.path.join(effective_t3_dir, "module")):
97 sys.exit("Effective T3 module folder not found: %r" % os.path.join(effective_t3_dir, "module")) 198 sys.exit(
199 "Effective T3 module folder not found: %r"
200 % os.path.join(effective_t3_dir, "module")
201 )
98 202
99 effective_t3_model = os.path.join(effective_t3_dir, "module", model) 203 effective_t3_model = os.path.join(effective_t3_dir, "module", model)
100 if not os.path.isfile(effective_t3_model): 204 if not os.path.isfile(effective_t3_model):
101 sys.stderr.write("Contents of %r is %s\n" 205 sys.stderr.write(
102 % (os.path.join(effective_t3_dir, "module"), 206 "Contents of %r is %s\n"
103 ", ".join(repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module"))))) 207 % (
208 os.path.join(effective_t3_dir, "module"),
209 ", ".join(
210 repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module"))
211 ),
212 )
213 )
104 sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar) 214 sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar)
105 sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model) 215 sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model)
106 216
107 # We will have write access whereever the output should be, 217 # We will have write access wherever the output should be,
108 temp_file = os.path.abspath(tabular_file + ".tmp") 218 if tabular_file == "/dev/stdout":
219 temp_file = os.path.abspath("effectivet3_tabular_output.tmp")
220 else:
221 temp_file = os.path.abspath(tabular_file + ".tmp")
109 222
110 # Use absolute paths since will change current directory... 223 # Use absolute paths since will change current directory...
111 tabular_file = os.path.abspath(tabular_file) 224 tabular_file = os.path.abspath(tabular_file)
112 fasta_file = os.path.abspath(fasta_file) 225 fasta_file = os.path.abspath(fasta_file)
113 226
114 cmd = ["java", "-jar", effective_t3_jar, 227 cmd = [
115 "-f", fasta_file, 228 "java",
116 "-m", model, 229 "-jar",
117 "-t", threshold, 230 effective_t3_jar,
118 "-o", temp_file, 231 "-f",
119 "-q"] 232 fasta_file,
233 "-m",
234 model,
235 "-t",
236 threshold,
237 "-o",
238 temp_file,
239 "-q",
240 ]
120 241
121 try: 242 try:
122 # Must run from directory above the module subfolder: 243 # Must run from directory above the module subfolder:
123 os.chdir(effective_t3_dir) 244 os.chdir(effective_t3_dir)
124 except Exception: 245 except Exception:
137 out_handle.close() 258 out_handle.close()
138 259
139 os.remove(temp_file) 260 os.remove(temp_file)
140 261
141 if errors: 262 if errors:
142 print("%i sequences, %i positive, %i errors" 263 print("%i sequences, %i positive, %i errors" % (count, positive, errors))
143 % (count, positive, errors))
144 else: 264 else:
145 print("%i/%i sequences positive" % (positive, count)) 265 print("%i/%i sequences positive" % (positive, count))
146 266
147 if count and count == errors: 267 if count and count == errors:
148 # Galaxy will still allow them to see the output file 268 # Galaxy will still allow them to see the output file