Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/tmhmm2.py @ 1:3ff1dcbb9440
Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository
author | peterjc |
---|---|
date | Tue, 07 Jun 2011 18:04:05 -0400 |
parents | bca9bc7fdaef |
children | 6901298ac16c |
comparison
equal
deleted
inserted
replaced
0:bca9bc7fdaef | 1:3ff1dcbb9440 |
---|---|
27 The second major potential feature is taking advantage of multiple cores | 27 The second major potential feature is taking advantage of multiple cores |
28 (since TMHMM v2.0 itself is single threaded) by dividing the input FASTA file | 28 (since TMHMM v2.0 itself is single threaded) by dividing the input FASTA file |
29 into chunks and running multiple copies of TMHMM in parallel. I would normally | 29 into chunks and running multiple copies of TMHMM in parallel. I would normally |
30 use Python's multiprocessing library in this situation but it requires at | 30 use Python's multiprocessing library in this situation but it requires at |
31 least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. | 31 least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. |
32 | |
33 Also tmhmm2 can fail without returning an error code, for example if run on a | |
34 64 bit machine with only the 32 bit binaries installed. This script will spot | |
35 when there is no output from tmhmm2, and raise an error. | |
32 """ | 36 """ |
33 import sys | 37 import sys |
34 import os | 38 import os |
35 from seq_analysis_utils import stop_err, split_fasta, run_jobs | 39 from seq_analysis_utils import stop_err, split_fasta, run_jobs |
36 | 40 |
46 stop_err("Threads argument %s is not a positive integer" % sys.argv[1]) | 50 stop_err("Threads argument %s is not a positive integer" % sys.argv[1]) |
47 fasta_file = sys.argv[2] | 51 fasta_file = sys.argv[2] |
48 tabular_file = sys.argv[3] | 52 tabular_file = sys.argv[3] |
49 | 53 |
50 def clean_tabular(raw_handle, out_handle): | 54 def clean_tabular(raw_handle, out_handle): |
51 """Clean up tabular TMHMM output.""" | 55 """Clean up tabular TMHMM output, returns output line count.""" |
56 count = 0 | |
52 for line in raw_handle: | 57 for line in raw_handle: |
53 if not line: | 58 if not line: |
54 continue | 59 continue |
55 parts = line.rstrip("\r\n").split("\t") | 60 parts = line.rstrip("\r\n").split("\t") |
56 try: | 61 try: |
66 first60 = first60[8:] | 71 first60 = first60[8:] |
67 assert predhel.startswith("PredHel="), line | 72 assert predhel.startswith("PredHel="), line |
68 predhel = predhel[8:] | 73 predhel = predhel[8:] |
69 assert topology.startswith("Topology="), line | 74 assert topology.startswith("Topology="), line |
70 topology = topology[9:] | 75 topology = topology[9:] |
71 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \ | 76 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \ |
72 % (identifier, length, expAA, first60, predhel, topology)) | 77 % (identifier, length, expAA, first60, predhel, topology)) |
78 count += 1 | |
79 return count | |
73 | 80 |
81 #Note that if the input FASTA file contains no sequences, | |
82 #split_fasta returns an empty list (i.e. zero temp files). | |
74 fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK) | 83 fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK) |
75 temp_files = [f+".out" for f in fasta_files] | 84 temp_files = [f+".out" for f in fasta_files] |
76 jobs = ["tmhmm %s > %s" % (fasta, temp) | 85 jobs = ["tmhmm %s > %s" % (fasta, temp) |
77 for fasta, temp in zip(fasta_files, temp_files)] | 86 for fasta, temp in zip(fasta_files, temp_files)] |
78 | 87 |
101 | 110 |
102 out_handle = open(tabular_file, "w") | 111 out_handle = open(tabular_file, "w") |
103 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") | 112 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") |
104 for temp in temp_files: | 113 for temp in temp_files: |
105 data_handle = open(temp) | 114 data_handle = open(temp) |
106 clean_tabular(data_handle, out_handle) | 115 count = clean_tabular(data_handle, out_handle) |
107 data_handle.close() | 116 data_handle.close() |
117 if not count: | |
118 clean_up(fasta_files) | |
119 clean_up(temp_files) | |
120 stop_err("No output from tmhmm2") | |
108 out_handle.close() | 121 out_handle.close() |
109 | 122 |
110 clean_up(fasta_files) | 123 clean_up(fasta_files) |
111 clean_up(temp_files) | 124 clean_up(temp_files) |