tmhmm_and_signalp: tools/protein_analysis/signalp3.py comparison

comparison tools/protein_analysis/signalp3.py @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes

author	peterjc
date	Thu, 21 Sep 2017 11:35:20 -0400
parents	f3ecd80850e2
children	238eae32483c

comparison

equal deleted inserted replaced

-:f3ecd80850e2
+:a19b3ded8f33
 itself (see the SignalP XML file for settings).
 Finally, you can opt to have a GFF3 file produced which will describe the
 predicted signal peptide and mature peptide for each protein (using one of
 the predictors which gives a cleavage site). *WORK IN PROGRESS*
-"""
+"""  # noqa: E501
+from __future__ import print_function
+import os
 import sys
-import os
 import tempfile
-from seq_analysis_utils import split_fasta, fasta_iterator
+from seq_analysis_utils import fasta_iterator, split_fasta
 from seq_analysis_utils import run_jobs, thread_count
 FASTA_CHUNK = 500
 MAX_LEN = 6000  # Found by trial and error
+if "-v" in sys.argv or "--version" in sys.argv:
+print("SignalP Galaxy wrapper version 0.0.19")
+sys.exit(os.system("signalp -version"))
 if len(sys.argv) not in [6, 8]:
 sys.exit("Require five (or 7) arguments, organism, truncate, threads, "
 "input protein FASTA file & output tabular file (plus "
 "optionally cut method and GFF3 output file). "
 tmp_dir = tempfile.mkdtemp()
-def clean_tabular(raw_handle, out_handle, gff_handle=None, cut_method=None):
+def clean_tabular(raw_handle, out_handle, gff_handle=None):
 """Clean up SignalP output to make it tabular."""
-if cut_method:
-cut_col = {"NN_Cmax": 2,
-"NN_Ymax": 5,
-"NN_Smax": 8,
-"HMM_Cmax": 16}[cut_method]
-else:
-cut_col = None
 for line in raw_handle:
 if not line or line.startswith("#"):
 continue
 parts = line.rstrip("\r\n").split()
 assert len(parts) == 21, repr(line)
 parts = parts[14:15] + parts[1:14] + parts[15:]
 out_handle.write("\t".join(parts) + "\n")
 def make_gff(fasta_file, tabular_file, gff_file, cut_method):
+"""Make a GFF file."""
 cut_col, score_col = {"NN_Cmax": (2, 1),
 "NN_Ymax": (5, 4),
 "NN_Smax": (8, 7),
 "HMM_Cmax": (16, 15),
 }[cut_method]
 # TODO - Why does it do this?
 cut = 1
 assert 1 <= cut <= len(seq), "%i for %s len %i" % (cut, seqid, len(seq))
 score = parts[score_col]
 gff_handle.write("##sequence-region %s %i %i\n"
 % (seqid, 1, len(seq)))
 # If the cut is at the very begining, there is no signal peptide!
 if cut > 1:
 # signal_peptide = SO:0000418
 gff_handle.write("%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\t%s\n"
 % (seqid, source,
 try:
 os.rmdir(tmp_dir)
 except Exception:
 pass
 if len(jobs) > 1 and num_threads > 1:
 # A small "info" message for Galaxy to show the user.
-print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
+print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)))
 results = run_jobs(jobs, num_threads)
 assert len(fasta_files) == len(temp_files) == len(jobs)
 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
 error_level = results[cmd]
 try:
 output = open(temp).readline()
 except IOError:
 output = "(no output)"
 if error_level or output.lower().startswith("error running"):
 clean_up(fasta_files + temp_files)
-sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+if output:
-error_level)
+sys.stderr.write("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output))
+else:
+sys.stderr.write("One or more tasks failed, e.g. %i from %r with no output\n" % (error_level, cmd))
+sys.exit(error_level)
 del results
 out_handle = open(tabular_file, "w")
 fields = ["ID"]
 # NN results:

Mercurial > repos > peterjc > tmhmm_and_signalp

comparison tools/protein_analysis/signalp3.py @ 20:a19b3ded8f33 draft