Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/tmhmm2.py @ 1:3ff1dcbb9440
Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
author | peterjc |
---|---|
date | Tue, 07 Jun 2011 18:04:05 -0400 |
parents | bca9bc7fdaef |
children | 6901298ac16c |
line wrap: on
line diff
--- a/tools/protein_analysis/tmhmm2.py Tue Jun 07 18:03:34 2011 -0400 +++ b/tools/protein_analysis/tmhmm2.py Tue Jun 07 18:04:05 2011 -0400 @@ -29,6 +29,10 @@ into chunks and running multiple copies of TMHMM in parallel. I would normally use Python's multiprocessing library in this situation but it requires at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. + +Also tmhmm2 can fail without returning an error code, for example if run on a +64 bit machine with only the 32 bit binaries installed. This script will spot +when there is no output from tmhmm2, and raise an error. """ import sys import os @@ -48,7 +52,8 @@ tabular_file = sys.argv[3] def clean_tabular(raw_handle, out_handle): - """Clean up tabular TMHMM output.""" + """Clean up tabular TMHMM output, returns output line count.""" + count = 0 for line in raw_handle: if not line: continue @@ -68,9 +73,13 @@ predhel = predhel[8:] assert topology.startswith("Topology="), line topology = topology[9:] - out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \ + out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \ % (identifier, length, expAA, first60, predhel, topology)) + count += 1 + return count +#Note that if the input FASTA file contains no sequences, +#split_fasta returns an empty list (i.e. zero temp files). fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK) temp_files = [f+".out" for f in fasta_files] jobs = ["tmhmm %s > %s" % (fasta, temp) @@ -103,8 +112,12 @@ out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") for temp in temp_files: data_handle = open(temp) - clean_tabular(data_handle, out_handle) + count = clean_tabular(data_handle, out_handle) data_handle.close() + if not count: + clean_up(fasta_files) + clean_up(temp_files) + stop_err("No output from tmhmm2") out_handle.close() clean_up(fasta_files)