Mercurial > repos > peterjc > effectivet3
diff tools/effectiveT3/effectiveT3.py @ 10:a46d7861c32c draft
"Update all the pico_galaxy tools on main Tool Shed"
author | peterjc |
---|---|
date | Fri, 16 Apr 2021 22:34:56 +0000 |
parents | 512530020360 |
children | ed8c1babc166 |
line wrap: on
line diff
--- a/tools/effectiveT3/effectiveT3.py Tue May 16 09:17:17 2017 -0400 +++ b/tools/effectiveT3/effectiveT3.py Fri Apr 16 22:34:56 2021 +0000 @@ -12,20 +12,33 @@ tab separated output for use in Galaxy. """ import os + +# We want to be able to use shutil.which, but need Python 3.3+ +# import shutil import subprocess import sys -# The Galaxy auto-install via tool_dependencies.xml will set this environment variable -effective_t3_dir = os.environ.get("EFFECTIVET3", "/opt/EffectiveT3/") -effective_t3_jar = os.path.join(effective_t3_dir, "TTSS_GUI-1.0.1.jar") +# The Galaxy auto-install via tool_dependencies.xml will set the +# environment variable $EFFECTIVET3 pointing at the folder with +# the JAR file. +# +# The BioConda recipe will put a wrapper script on the $PATH, +# which we can use to find the JAR file. +# +# We fall back on /opt/EffectiveT3/ +# +effective_t3_jarname = "TTSS_GUI-1.0.1.jar" if "-v" in sys.argv or "--version" in sys.argv: # TODO - Get version of the JAR file dynamically? - print("Wrapper v0.0.17, TTSS_GUI-1.0.1.jar") + print("Wrapper v0.0.20, for %s" % effective_t3_jarname) sys.exit(0) if len(sys.argv) != 5: - sys.exit("Require four arguments: model, threshold, input protein FASTA file & output tabular file") + sys.exit( + "Require four arguments: model, threshold, input protein " + "FASTA file & output tabular file" + ) model, threshold, fasta_file, tabular_file = sys.argv[1:] @@ -33,7 +46,9 @@ sys.exit("Input FASTA file not found: %s" % fasta_file) if threshold not in ["selective", "sensitive"] and not threshold.startswith("cutoff="): - sys.exit("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold) + sys.exit( + "Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold + ) def clean_tabular(raw_handle, out_handle): @@ -42,7 +57,11 @@ positive = 0 errors = 0 for line in raw_handle: - if not line or line.startswith("#") or line.startswith("Id; Description; Score;"): + if ( + not line + or line.startswith("#") + or line.startswith("Id; Description; Score;") + ): continue assert line.count(";") >= 3, repr(line) # Normally there will just be three semi-colons, however the @@ -73,8 +92,10 @@ # Avoid using shell=True when we call subprocess to ensure if the Python # script is killed, so too is the child process. try: - child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except Exception, err: + child = subprocess.Popen( + cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + except Exception as err: sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) # Use .communicate as can get deadlocks with .wait(), stdout, stderr = child.communicate() @@ -82,41 +103,141 @@ if return_code or stderr.startswith("Exception in thread"): cmd_str = " ".join(cmd) # doesn't quote spaces etc if stderr and stdout: - sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) + sys.exit( + "Return code %i from command:\n%s\n\n%s\n\n%s" + % (return_code, cmd_str, stdout, stderr) + ) else: - sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) + sys.exit( + "Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr) + ) -if not os.path.isdir(effective_t3_dir): - sys.exit("Effective T3 folder not found: %r" % effective_t3_dir) +try: + from shutil import which +except ImportError: + # Likely running on Python 2, use backport: + def which(cmd, mode=os.F_OK | os.X_OK, path=None): + """Python implementation of command line tool which. + + Given a command, mode, and a PATH string, return the path which + conforms to the given mode on the PATH, or None if there is no such + file. + + `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result + of os.environ.get("PATH"), or can be overridden with a custom search + path. + """ + # Check that a given file can be accessed with the correct mode. + # Additionally check that `file` is not a directory, as on Windows + # directories pass the os.access check. + def _access_check(fn, mode): + return os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn) + + # Short circuit. If we're given a full path which matches the mode + # and it exists, we're done here. + if _access_check(cmd, mode): + return cmd + + path = (path or os.environ.get("PATH", os.defpath)).split(os.pathsep) + + if sys.platform == "win32": + # The current directory takes precedence on Windows. + if os.curdir not in path: + path.insert(0, os.curdir) -if not os.path.isfile(effective_t3_jar): - sys.exit("Effective T3 JAR file not found: %r" % effective_t3_jar) + # PATHEXT is necessary to check on Windows. + pathext = os.environ.get("PATHEXT", "").split(os.pathsep) + # See if the given file matches any of the expected path extensions. + # This will allow us to short circuit when given "python.exe". + matches = [cmd for ext in pathext if cmd.lower().endswith(ext.lower())] + # If it does match, only test that one, otherwise we have to try + # others. + files = [cmd] if matches else [cmd + ext.lower() for ext in pathext] + else: + # On other platforms you don't have things like PATHEXT to tell you + # what file suffixes are executable, so just pass on cmd as-is. + files = [cmd] + + seen = set() + for dir in path: + dir = os.path.normcase(dir) + if dir not in seen: + seen.add(dir) + for thefile in files: + name = os.path.join(dir, thefile) + if _access_check(name, mode): + return name + return None + + +# Try in order the following to find the JAR file: +# - Location of any wrapper script, e.g. from BioConda installation +# - The $EFFECTIVET3 env var, e.g. old-style Galaxy tool installation +# - The /opt/EffectiveT3/ folder. +effective_t3_jar = None +effective_t3_dir = None +dirs = ["/opt/EffectiveT3/"] +if "EFFECTIVET3" in os.environ: + dirs.insert(0, os.environ.get("EFFECTIVET3")) +if which("effectivet3"): + # Assuming this is a BioConda installed wrapper for effective T3, + # this will get the directory of the wrapper script which is where + # the JAR file will be: + dirs.insert(0, os.path.split(os.path.realpath(which("effectivet3")))[0]) +for effective_t3_dir in dirs: + effective_t3_jar = os.path.join(effective_t3_dir, effective_t3_jarname) + if os.path.isfile(effective_t3_jar): + # Good + break + effective_t3_jar = None +if not effective_t3_dir or not effective_t3_jar: + sys.exit("Effective T3 JAR file %r not found in %r" % (effective_t3_jarname, dirs)) if not os.path.isdir(os.path.join(effective_t3_dir, "module")): - sys.exit("Effective T3 module folder not found: %r" % os.path.join(effective_t3_dir, "module")) + sys.exit( + "Effective T3 module folder not found: %r" + % os.path.join(effective_t3_dir, "module") + ) effective_t3_model = os.path.join(effective_t3_dir, "module", model) if not os.path.isfile(effective_t3_model): - sys.stderr.write("Contents of %r is %s\n" - % (os.path.join(effective_t3_dir, "module"), - ", ".join(repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module"))))) + sys.stderr.write( + "Contents of %r is %s\n" + % ( + os.path.join(effective_t3_dir, "module"), + ", ".join( + repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module")) + ), + ) + ) sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar) sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model) -# We will have write access whereever the output should be, -temp_file = os.path.abspath(tabular_file + ".tmp") +# We will have write access wherever the output should be, +if tabular_file == "/dev/stdout": + temp_file = os.path.abspath("effectivet3_tabular_output.tmp") +else: + temp_file = os.path.abspath(tabular_file + ".tmp") # Use absolute paths since will change current directory... tabular_file = os.path.abspath(tabular_file) fasta_file = os.path.abspath(fasta_file) -cmd = ["java", "-jar", effective_t3_jar, - "-f", fasta_file, - "-m", model, - "-t", threshold, - "-o", temp_file, - "-q"] +cmd = [ + "java", + "-jar", + effective_t3_jar, + "-f", + fasta_file, + "-m", + model, + "-t", + threshold, + "-o", + temp_file, + "-q", +] try: # Must run from directory above the module subfolder: @@ -139,8 +260,7 @@ os.remove(temp_file) if errors: - print("%i sequences, %i positive, %i errors" - % (count, positive, errors)) + print("%i sequences, %i positive, %i errors" % (count, positive, errors)) else: print("%i/%i sequences positive" % (positive, count))