diff tools/protein_analysis/tmhmm2.py @ 1:3ff1dcbb9440

Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
author peterjc
date Tue, 07 Jun 2011 18:04:05 -0400
parents bca9bc7fdaef
children 6901298ac16c
line wrap: on
line diff
--- a/tools/protein_analysis/tmhmm2.py	Tue Jun 07 18:03:34 2011 -0400
+++ b/tools/protein_analysis/tmhmm2.py	Tue Jun 07 18:04:05 2011 -0400
@@ -29,6 +29,10 @@
 into chunks and running multiple copies of TMHMM in parallel. I would normally
 use Python's multiprocessing library in this situation but it requires at
 least Python 2.6 and at the time of writing Galaxy still supports Python 2.4.
+
+Also tmhmm2 can fail without returning an error code, for example if run on a
+64 bit machine with only the 32 bit binaries installed. This script will spot
+when there is no output from tmhmm2, and raise an error.
 """
 import sys
 import os
@@ -48,7 +52,8 @@
 tabular_file = sys.argv[3]
 
 def clean_tabular(raw_handle, out_handle):
-    """Clean up tabular TMHMM output."""
+    """Clean up tabular TMHMM output, returns output line count."""
+    count = 0
     for line in raw_handle:
         if not line:
             continue
@@ -68,9 +73,13 @@
         predhel = predhel[8:]
         assert topology.startswith("Topology="), line
         topology = topology[9:]
-	out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \
+        out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \
                    % (identifier, length, expAA, first60, predhel, topology))
+        count += 1
+    return count
 
+#Note that if the input FASTA file contains no sequences,
+#split_fasta returns an empty list (i.e. zero temp files).
 fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK)
 temp_files = [f+".out" for f in fasta_files]
 jobs = ["tmhmm %s > %s" % (fasta, temp)
@@ -103,8 +112,12 @@
 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n")
 for temp in temp_files:
     data_handle = open(temp)
-    clean_tabular(data_handle, out_handle)
+    count = clean_tabular(data_handle, out_handle)
     data_handle.close()
+    if not count:
+        clean_up(fasta_files)
+        clean_up(temp_files)
+        stop_err("No output from tmhmm2")
 out_handle.close()
 
 clean_up(fasta_files)