Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/psortb.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
| author | peterjc |
|---|---|
| date | Thu, 21 Sep 2017 11:35:20 -0400 |
| parents | f3ecd80850e2 |
| children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
| 19:f3ecd80850e2 | 20:a19b3ded8f33 |
|---|---|
| 19 itself (see the SignalP XML file for settings), but both can be applied. | 19 itself (see the SignalP XML file for settings), but both can be applied. |
| 20 | 20 |
| 21 Additionally it ensures the header line (with the column names) starts | 21 Additionally it ensures the header line (with the column names) starts |
| 22 with a # character as used elsewhere in Galaxy. | 22 with a # character as used elsewhere in Galaxy. |
| 23 """ | 23 """ |
| 24 | |
| 25 from __future__ import print_function | |
| 26 | |
| 27 import os | |
| 24 import sys | 28 import sys |
| 25 import os | |
| 26 import tempfile | 29 import tempfile |
| 27 from seq_analysis_utils import split_fasta, run_jobs, thread_count | 30 |
| 31 from seq_analysis_utils import run_jobs, split_fasta, thread_count | |
| 28 | 32 |
| 29 FASTA_CHUNK = 500 | 33 FASTA_CHUNK = 500 |
| 30 | 34 |
| 31 if "-v" in sys.argv or "--version" in sys.argv: | 35 if "-v" in sys.argv or "--version" in sys.argv: |
| 32 """Return underlying PSORTb's version""" | 36 """Return underlying PSORTb's version""" |
| 63 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', | 67 header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details', |
| 64 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', | 68 'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details', |
| 65 'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details', | 69 'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details', |
| 66 'OMSVM-_Localization', 'OMSVM-_Details', 'PPSVM-_Localization', 'PPSVM-_Details', | 70 'OMSVM-_Localization', 'OMSVM-_Details', 'PPSVM-_Localization', 'PPSVM-_Details', |
| 67 'Profile-_Localization', 'Profile-_Details', | 71 'Profile-_Localization', 'Profile-_Details', |
| 68 'SCL-BLAST-_Localization', 'SCL-BLAST-_Details', 'SCL-BLASTe-_Localization', 'SCL-BLASTe-_Details', | 72 'SCL-BLAST-_Localization', 'SCL-BLAST-_Details', |
| 73 'SCL-BLASTe-_Localization', 'SCL-BLASTe-_Details', | |
| 69 'Signal-_Localization', 'Signal-_Details', | 74 'Signal-_Localization', 'Signal-_Details', |
| 70 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Periplasmic_Score', 'OuterMembrane_Score', | 75 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Periplasmic_Score', 'OuterMembrane_Score', |
| 71 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 76 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
| 72 'Secondary_Localization', 'PSortb_Version'] | 77 'Secondary_Localization', 'PSortb_Version'] |
| 73 elif org_type == "-p": | 78 elif org_type == "-p": |
| 74 # Gram positive bacteria | 79 # Gram positive bacteria |
| 75 header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details', | 80 header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details', |
| 76 'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details', | 81 'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details', |
| 77 'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details', | 82 'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details', |
| 78 'Profile+_Localization', 'Profile+_Details', | 83 'Profile+_Localization', 'Profile+_Details', |
| 79 'SCL-BLAST+_Localization', 'SCL-BLAST+_Details', 'SCL-BLASTe+_Localization', 'SCL-BLASTe+_Details', | 84 'SCL-BLAST+_Localization', 'SCL-BLAST+_Details', |
| 85 'SCL-BLASTe+_Localization', 'SCL-BLASTe+_Details', | |
| 80 'Signal+_Localization', 'Signal+_Details', | 86 'Signal+_Localization', 'Signal+_Details', |
| 81 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', | 87 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', |
| 82 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 88 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
| 83 'Secondary_Localization', 'PSortb_Version'] | 89 'Secondary_Localization', 'PSortb_Version'] |
| 84 elif org_type == "-a": | 90 elif org_type == "-a": |
| 85 # Archaea | 91 # Archaea |
| 86 header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details', | 92 header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details', |
| 87 'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details', | 93 'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details', |
| 88 'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details', | 94 'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details', |
| 89 'Profile_a_Localization', 'Profile_a_Details', | 95 'Profile_a_Localization', 'Profile_a_Details', |
| 90 'SCL-BLAST_a_Localization', 'SCL-BLAST_a_Details', 'SCL-BLASTe_a_Localization', 'SCL-BLASTe_a_Details', | 96 'SCL-BLAST_a_Localization', 'SCL-BLAST_a_Details', |
| 97 'SCL-BLASTe_a_Localization', 'SCL-BLASTe_a_Details', | |
| 91 'Signal_a_Localization', 'Signal_a_Details', | 98 'Signal_a_Localization', 'Signal_a_Details', |
| 92 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', | 99 'Cytoplasmic_Score', 'CytoplasmicMembrane_Score', 'Cellwall_Score', |
| 93 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', | 100 'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score', |
| 94 'Secondary_Localization', 'PSortb_Version'] | 101 'Secondary_Localization', 'PSortb_Version'] |
| 95 else: | 102 else: |
| 120 "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) | 127 "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) |
| 121 out_handle.write(line) | 128 out_handle.write(line) |
| 122 count += 1 | 129 count += 1 |
| 123 return count | 130 return count |
| 124 | 131 |
| 132 | |
| 125 # Note that if the input FASTA file contains no sequences, | 133 # Note that if the input FASTA file contains no sequences, |
| 126 # split_fasta returns an empty list (i.e. zero temp files). | 134 # split_fasta returns an empty list (i.e. zero temp files). |
| 127 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) | 135 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) |
| 128 temp_files = [f + ".out" for f in fasta_files] | 136 temp_files = [f + ".out" for f in fasta_files] |
| 129 jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) | 137 jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) |
| 137 try: | 145 try: |
| 138 os.rmdir(tmp_dir) | 146 os.rmdir(tmp_dir) |
| 139 except Exception: | 147 except Exception: |
| 140 pass | 148 pass |
| 141 | 149 |
| 150 | |
| 142 if len(jobs) > 1 and num_threads > 1: | 151 if len(jobs) > 1 and num_threads > 1: |
| 143 # A small "info" message for Galaxy to show the user. | 152 # A small "info" message for Galaxy to show the user. |
| 144 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 153 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) |
| 145 results = run_jobs(jobs, num_threads) | 154 results = run_jobs(jobs, num_threads) |
| 146 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): | 155 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): |
| 147 error_level = results[cmd] | 156 error_level = results[cmd] |
| 148 if error_level: | 157 if error_level: |
| 149 try: | 158 try: |
| 165 data_handle.close() | 174 data_handle.close() |
| 166 if not count: | 175 if not count: |
| 167 clean_up(fasta_files + temp_files) | 176 clean_up(fasta_files + temp_files) |
| 168 sys.exit("No output from psortb") | 177 sys.exit("No output from psortb") |
| 169 out_handle.close() | 178 out_handle.close() |
| 170 print "%i records" % count | 179 print("%i records" % count) |
| 171 | 180 |
| 172 clean_up(fasta_files + temp_files) | 181 clean_up(fasta_files + temp_files) |
