Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/promoter2.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
line wrap: on
line diff
--- a/tools/protein_analysis/promoter2.py Wed Feb 01 09:46:42 2017 -0500 +++ b/tools/protein_analysis/promoter2.py Thu Sep 21 11:35:20 2017 -0400 @@ -18,7 +18,6 @@ Additionally, in order to take advantage of multiple cores the input FASTA file is broken into chunks and multiple copies of promoter run at once. This can be used in combination with the job-splitting available in Galaxy. - Note that rewriting the FASTA input file allows us to avoid a bug in promoter 2 with long descriptions in the FASTA header line (over 200 characters) which produces stray fragements of the description in the @@ -26,11 +25,15 @@ TODO - Automatically extract the sequence containing a promoter prediction? """ -import sys -import os + +from __future__ import print_function + import commands +import os +import sys import tempfile -from seq_analysis_utils import split_fasta, run_jobs, thread_count + +from seq_analysis_utils import run_jobs, split_fasta, thread_count FASTA_CHUNK = 500 @@ -49,6 +52,7 @@ def get_path_and_binary(): + """Determine path and binary names for promoter tool.""" platform = commands.getoutput("uname") # e.g. Linux shell_script = commands.getoutput("which promoter") if not os.path.isfile(shell_script): @@ -74,7 +78,7 @@ identifier = None queries = 0 for line in raw_handle: - # print repr(line) + # print(repr(line)) if not line.strip() or line == "Promoter prediction:\n": pass elif line[0] != " ": @@ -89,7 +93,7 @@ try: position, score, likelihood = line.strip().split(None, 2) except ValueError: - print "WARNING: Problem with line: %r" % line + print("WARNING: Problem with line: %r" % line) continue # sys.exit("ERROR: Problem with line: %r" % line) if likelihood not in ["ignored", @@ -100,6 +104,7 @@ out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) return queries + working_dir, bin = get_path_and_binary() if not os.path.isfile(fasta_file): @@ -124,9 +129,10 @@ except Exception: pass + if len(jobs) > 1 and num_threads > 1: # A small "info" message for Galaxy to show the user. - print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) + print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) cur_dir = os.path.abspath(os.curdir) os.chdir(working_dir) results = run_jobs(jobs, num_threads) @@ -159,4 +165,4 @@ out_handle.close() clean_up(fasta_files + temp_files) -print "Results for %i queries" % queries +print("Results for %i queries" % queries)