Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/psortb.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
19:f3ecd80850e2 | 20:a19b3ded8f33 |
---|---|
19 itself (see the SignalP XML file for settings), but both can be applied. | 19 itself (see the SignalP XML file for settings), but both can be applied. |
20 | 20 |
21 Additionally it ensures the header line (with the column names) starts | 21 Additionally it ensures the header line (with the column names) starts |
22 with a # character as used elsewhere in Galaxy. | 22 with a # character as used elsewhere in Galaxy. |
23 """ | 23 """ |
24 | |
25 from __future__ import print_function | |
26 | |
27 import os | |
24 import sys | 28 import sys |
25 import os | |
26 import tempfile | 29 import tempfile |
27 from seq_analysis_utils import split_fasta, run_jobs, thread_count | 30 |
31 from seq_analysis_utils import run_jobs, split_fasta, thread_count | |
28 | 32 |
29 FASTA_CHUNK = 500 | 33 FASTA_CHUNK = 500 |
30 | 34 |
31 if "-v" in sys.argv or "--version" in sys.argv: | 35 if "-v" in sys.argv or "--version" in sys.argv: |
32 """Return underlying PSORTb's version""" | 36 """Return underlying PSORTb's version""" |
63 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', | 67 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', |
64 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', | 68 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', |
65 'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details', | 69 'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details', |
66 'OMSVM-_Localization', 'OMSVM-_Details', 'PPSVM-_Localization', 'PPSVM-_Details', | 70 'OMSVM-_Localization', 'OMSVM-_Details', 'PPSVM-_Localization', 'PPSVM-_Details', |
67 'Profile-_Localization', 'Profile-_Details', | 71 'Profile-_Localization', 'Profile-_Details', |
68 'SCL-BLAST-_Localization', 'SCL-BLAST-_Details', 'SCL-BLASTe-_Localization', 'SCL-BLASTe-_Details', | 72 'SCL-BLAST-_Localization', 'SCL-BLAST-_Details', |
73 'SCL-BLASTe-_Localization', 'SCL-BLASTe-_Details', | |
69 'Signal-_Localization', 'Signal-_Details', | 74 'Signal-_Localization', 'Signal-_Details', |
70 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Periplasmic_Score', 'OuterMembrane_Score', | 75 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Periplasmic_Score', 'OuterMembrane_Score', |
71 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 76 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
72 'Secondary_Localization', 'PSortb_Version'] | 77 'Secondary_Localization', 'PSortb_Version'] |
73 elif org_type == "-p": | 78 elif org_type == "-p": |
74 # Gram positive bacteria | 79 # Gram positive bacteria |
75 header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details', | 80 header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details', |
76 'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details', | 81 'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details', |
77 'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details', | 82 'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details', |
78 'Profile+_Localization', 'Profile+_Details', | 83 'Profile+_Localization', 'Profile+_Details', |
79 'SCL-BLAST+_Localization', 'SCL-BLAST+_Details', 'SCL-BLASTe+_Localization', 'SCL-BLASTe+_Details', | 84 'SCL-BLAST+_Localization', 'SCL-BLAST+_Details', |
85 'SCL-BLASTe+_Localization', 'SCL-BLASTe+_Details', | |
80 'Signal+_Localization', 'Signal+_Details', | 86 'Signal+_Localization', 'Signal+_Details', |
81 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', | 87 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', |
82 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 88 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
83 'Secondary_Localization', 'PSortb_Version'] | 89 'Secondary_Localization', 'PSortb_Version'] |
84 elif org_type == "-a": | 90 elif org_type == "-a": |
85 # Archaea | 91 # Archaea |
86 header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details', | 92 header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details', |
87 'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details', | 93 'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details', |
88 'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details', | 94 'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details', |
89 'Profile_a_Localization', 'Profile_a_Details', | 95 'Profile_a_Localization', 'Profile_a_Details', |
90 'SCL-BLAST_a_Localization', 'SCL-BLAST_a_Details', 'SCL-BLASTe_a_Localization', 'SCL-BLASTe_a_Details', | 96 'SCL-BLAST_a_Localization', 'SCL-BLAST_a_Details', |
97 'SCL-BLASTe_a_Localization', 'SCL-BLASTe_a_Details', | |
91 'Signal_a_Localization', 'Signal_a_Details', | 98 'Signal_a_Localization', 'Signal_a_Details', |
92 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', | 99 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', |
93 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 100 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
94 'Secondary_Localization', 'PSortb_Version'] | 101 'Secondary_Localization', 'PSortb_Version'] |
95 else: | 102 else: |
120 "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) | 127 "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) |
121 out_handle.write(line) | 128 out_handle.write(line) |
122 count += 1 | 129 count += 1 |
123 return count | 130 return count |
124 | 131 |
132 | |
125 # Note that if the input FASTA file contains no sequences, | 133 # Note that if the input FASTA file contains no sequences, |
126 # split_fasta returns an empty list (i.e. zero temp files). | 134 # split_fasta returns an empty list (i.e. zero temp files). |
127 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) | 135 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) |
128 temp_files = [f + ".out" for f in fasta_files] | 136 temp_files = [f + ".out" for f in fasta_files] |
129 jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) | 137 jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) |
137 try: | 145 try: |
138 os.rmdir(tmp_dir) | 146 os.rmdir(tmp_dir) |
139 except Exception: | 147 except Exception: |
140 pass | 148 pass |
141 | 149 |
150 | |
142 if len(jobs) > 1 and num_threads > 1: | 151 if len(jobs) > 1 and num_threads > 1: |
143 # A small "info" message for Galaxy to show the user. | 152 # A small "info" message for Galaxy to show the user. |
144 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 153 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) |
145 results = run_jobs(jobs, num_threads) | 154 results = run_jobs(jobs, num_threads) |
146 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): | 155 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): |
147 error_level = results[cmd] | 156 error_level = results[cmd] |
148 if error_level: | 157 if error_level: |
149 try: | 158 try: |
165 data_handle.close() | 174 data_handle.close() |
166 if not count: | 175 if not count: |
167 clean_up(fasta_files + temp_files) | 176 clean_up(fasta_files + temp_files) |
168 sys.exit("No output from psortb") | 177 sys.exit("No output from psortb") |
169 out_handle.close() | 178 out_handle.close() |
170 print "%i records" % count | 179 print("%i records" % count) |
171 | 180 |
172 clean_up(fasta_files + temp_files) | 181 clean_up(fasta_files + temp_files) |