Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/signalp3.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
line wrap: on
line diff
--- a/tools/protein_analysis/signalp3.py Wed Feb 01 09:46:42 2017 -0500 +++ b/tools/protein_analysis/signalp3.py Thu Sep 21 11:35:20 2017 -0400 @@ -52,16 +52,24 @@ Finally, you can opt to have a GFF3 file produced which will describe the predicted signal peptide and mature peptide for each protein (using one of the predictors which gives a cleavage site). *WORK IN PROGRESS* -""" -import sys +""" # noqa: E501 + +from __future__ import print_function + import os +import sys import tempfile -from seq_analysis_utils import split_fasta, fasta_iterator + +from seq_analysis_utils import fasta_iterator, split_fasta from seq_analysis_utils import run_jobs, thread_count FASTA_CHUNK = 500 MAX_LEN = 6000 # Found by trial and error +if "-v" in sys.argv or "--version" in sys.argv: + print("SignalP Galaxy wrapper version 0.0.19") + sys.exit(os.system("signalp -version")) + if len(sys.argv) not in [6, 8]: sys.exit("Require five (or 7) arguments, organism, truncate, threads, " "input protein FASTA file & output tabular file (plus " @@ -96,15 +104,8 @@ tmp_dir = tempfile.mkdtemp() -def clean_tabular(raw_handle, out_handle, gff_handle=None, cut_method=None): +def clean_tabular(raw_handle, out_handle, gff_handle=None): """Clean up SignalP output to make it tabular.""" - if cut_method: - cut_col = {"NN_Cmax": 2, - "NN_Ymax": 5, - "NN_Smax": 8, - "HMM_Cmax": 16}[cut_method] - else: - cut_col = None for line in raw_handle: if not line or line.startswith("#"): continue @@ -119,6 +120,7 @@ def make_gff(fasta_file, tabular_file, gff_file, cut_method): + """Make a GFF file.""" cut_col, score_col = {"NN_Cmax": (2, 1), "NN_Ymax": (5, 4), "NN_Smax": (8, 7), @@ -152,7 +154,7 @@ assert 1 <= cut <= len(seq), "%i for %s len %i" % (cut, seqid, len(seq)) score = parts[score_col] gff_handle.write("##sequence-region %s %i %i\n" - % (seqid, 1, len(seq))) + % (seqid, 1, len(seq))) # If the cut is at the very begining, there is no signal peptide! if cut > 1: # signal_peptide = SO:0000418 @@ -188,9 +190,10 @@ except Exception: pass + if len(jobs) > 1 and num_threads > 1: # A small "info" message for Galaxy to show the user. - print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) + print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) results = run_jobs(jobs, num_threads) assert len(fasta_files) == len(temp_files) == len(jobs) for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): @@ -201,8 +204,11 @@ output = "(no output)" if error_level or output.lower().startswith("error running"): clean_up(fasta_files + temp_files) - sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), - error_level) + if output: + sys.stderr.write("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output)) + else: + sys.stderr.write("One or more tasks failed, e.g. %i from %r with no output\n" % (error_level, cmd)) + sys.exit(error_level) del results out_handle = open(tabular_file, "w")