comparison tools/protein_analysis/psortb.py @ 18:eb6ac44d4b8e draft

Suite v0.2.8, record Promoter 2 verion + misc internal updates
author peterjc
date Tue, 01 Sep 2015 09:56:36 -0400
parents 99b82a2b1272
children f3ecd80850e2
comparison
equal deleted inserted replaced
17:e6cc27d182a8 18:eb6ac44d4b8e
22 with a # character as used elsewhere in Galaxy. 22 with a # character as used elsewhere in Galaxy.
23 """ 23 """
24 import sys 24 import sys
25 import os 25 import os
26 import tempfile 26 import tempfile
27 from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count 27 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
28 28
29 FASTA_CHUNK = 500 29 FASTA_CHUNK = 500
30 30
31 if "-v" in sys.argv or "--version" in sys.argv: 31 if "-v" in sys.argv or "--version" in sys.argv:
32 """Return underlying PSORTb's version""" 32 """Return underlying PSORTb's version"""
33 sys.exit(os.system("psort --version")) 33 sys.exit(os.system("psort --version"))
34 34
35 if len(sys.argv) != 8: 35 if len(sys.argv) != 8:
36 stop_err("Require 7 arguments, number of threads (int), type (e.g. archaea), " 36 sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), "
37 "output (e.g. terse/normal/long), cutoff, divergent, input protein " 37 "output (e.g. terse/normal/long), cutoff, divergent, input protein "
38 "FASTA file & output tabular file") 38 "FASTA file & output tabular file")
39 39
40 num_threads = thread_count(sys.argv[1], default=4) 40 num_threads = thread_count(sys.argv[1], default=4)
41 org_type = sys.argv[2] 41 org_type = sys.argv[2]
54 tabular_file = sys.argv[7] 54 tabular_file = sys.argv[7]
55 55
56 if out_type == "terse": 56 if out_type == "terse":
57 header = ['SeqID', 'Localization', 'Score'] 57 header = ['SeqID', 'Localization', 'Score']
58 elif out_type == "normal": 58 elif out_type == "normal":
59 stop_err("Normal output not implemented yet, sorry.") 59 sys_exit("Normal output not implemented yet, sorry.")
60 elif out_type == "long": 60 elif out_type == "long":
61 if org_type == "-n": 61 if org_type == "-n":
62 #Gram negative bacteria 62 #Gram negative bacteria
63 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', 63 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details',
64 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', 64 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details',
91 'Signal_a_Localization', 'Signal_a_Details', 91 'Signal_a_Localization', 'Signal_a_Details',
92 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', 92 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score',
93 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', 93 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
94 'Secondary_Localization', 'PSortb_Version'] 94 'Secondary_Localization', 'PSortb_Version']
95 else: 95 else:
96 stop_err("Expected -n, -p or -a for the organism type, not %r" % org_type) 96 sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type)
97 else: 97 else:
98 stop_err("Expected terse, normal or long for the output type, not %r" % out_type) 98 sys_exit("Expected terse, normal or long for the output type, not %r" % out_type)
99 99
100 tmp_dir = tempfile.mkdtemp() 100 tmp_dir = tempfile.mkdtemp()
101 101
102 def clean_tabular(raw_handle, out_handle): 102 def clean_tabular(raw_handle, out_handle):
103 """Clean up tabular TMHMM output, returns output line count.""" 103 """Clean up tabular TMHMM output, returns output line count."""
147 try: 147 try:
148 output = open(temp).readline() 148 output = open(temp).readline()
149 except IOError: 149 except IOError:
150 output = "" 150 output = ""
151 clean_up(fasta_files + temp_files) 151 clean_up(fasta_files + temp_files)
152 stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 152 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
153 error_level) 153 error_level)
154 del results 154 del results
155 del jobs 155 del jobs
156 156
157 out_handle = open(tabular_file, "w") 157 out_handle = open(tabular_file, "w")
161 data_handle = open(temp) 161 data_handle = open(temp)
162 count += clean_tabular(data_handle, out_handle) 162 count += clean_tabular(data_handle, out_handle)
163 data_handle.close() 163 data_handle.close()
164 if not count: 164 if not count:
165 clean_up(fasta_files + temp_files) 165 clean_up(fasta_files + temp_files)
166 stop_err("No output from psortb") 166 sys_exit("No output from psortb")
167 out_handle.close() 167 out_handle.close()
168 print "%i records" % count 168 print "%i records" % count
169 169
170 clean_up(fasta_files + temp_files) 170 clean_up(fasta_files + temp_files)