Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/promoter2.py @ 18:eb6ac44d4b8e draft
Suite v0.2.8, record Promoter 2 verion + misc internal updates
| author | peterjc |
|---|---|
| date | Tue, 01 Sep 2015 09:56:36 -0400 |
| parents | e52220a9ddad |
| children | f3ecd80850e2 |
comparison
equal
deleted
inserted
replaced
| 17:e6cc27d182a8 | 18:eb6ac44d4b8e |
|---|---|
| 28 """ | 28 """ |
| 29 import sys | 29 import sys |
| 30 import os | 30 import os |
| 31 import commands | 31 import commands |
| 32 import tempfile | 32 import tempfile |
| 33 from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count | 33 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count |
| 34 | 34 |
| 35 FASTA_CHUNK = 500 | 35 FASTA_CHUNK = 500 |
| 36 | 36 |
| 37 if "-v" in sys.argv or "--version" in sys.argv: | |
| 38 sys.exit(os.system("promoter -V")) | |
| 39 | |
| 37 if len(sys.argv) != 4: | 40 if len(sys.argv) != 4: |
| 38 stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " | 41 sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " |
| 39 "Got %i arguments." % (len(sys.argv)-1)) | 42 "Got %i arguments." % (len(sys.argv)-1)) |
| 40 | 43 |
| 41 num_threads = thread_count(sys.argv[3],default=4) | 44 num_threads = thread_count(sys.argv[3],default=4) |
| 42 fasta_file = os.path.abspath(sys.argv[2]) | 45 fasta_file = os.path.abspath(sys.argv[2]) |
| 43 tabular_file = os.path.abspath(sys.argv[3]) | 46 tabular_file = os.path.abspath(sys.argv[3]) |
| 46 | 49 |
| 47 def get_path_and_binary(): | 50 def get_path_and_binary(): |
| 48 platform = commands.getoutput("uname") #e.g. Linux | 51 platform = commands.getoutput("uname") #e.g. Linux |
| 49 shell_script = commands.getoutput("which promoter") | 52 shell_script = commands.getoutput("which promoter") |
| 50 if not os.path.isfile(shell_script): | 53 if not os.path.isfile(shell_script): |
| 51 stop_err("ERROR: Missing promoter executable shell script") | 54 sys_exit("ERROR: Missing promoter executable shell script") |
| 52 path = None | 55 path = None |
| 53 for line in open(shell_script): | 56 for line in open(shell_script): |
| 54 if line.startswith("setenv"): #could then be tab or space! | 57 if line.startswith("setenv"): #could then be tab or space! |
| 55 parts = line.rstrip().split(None, 2) | 58 parts = line.rstrip().split(None, 2) |
| 56 if parts[0] == "setenv" and parts[1] == "PROM": | 59 if parts[0] == "setenv" and parts[1] == "PROM": |
| 57 path = parts[2] | 60 path = parts[2] |
| 58 if not path: | 61 if not path: |
| 59 stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script) | 62 sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script) |
| 60 if not os.path.isdir(path): | 63 if not os.path.isdir(path): |
| 61 stop_error("ERROR: %r is not a directory" % path) | 64 sys_exit("ERROR: %r is not a directory" % path) |
| 62 bin = "%s/bin/promoter_%s" % (path, platform) | 65 bin = "%s/bin/promoter_%s" % (path, platform) |
| 63 if not os.path.isfile(bin): | 66 if not os.path.isfile(bin): |
| 64 stop_err("ERROR: Missing promoter binary %r" % bin) | 67 sys_exit("ERROR: Missing promoter binary %r" % bin) |
| 65 return path, bin | 68 return path, bin |
| 66 | 69 |
| 67 def make_tabular(raw_handle, out_handle): | 70 def make_tabular(raw_handle, out_handle): |
| 68 """Parse text output into tabular, return query count.""" | 71 """Parse text output into tabular, return query count.""" |
| 69 identifier = None | 72 identifier = None |
| 84 try: | 87 try: |
| 85 position, score, likelihood = line.strip().split(None,2) | 88 position, score, likelihood = line.strip().split(None,2) |
| 86 except ValueError: | 89 except ValueError: |
| 87 print "WARNING: Problem with line: %r" % line | 90 print "WARNING: Problem with line: %r" % line |
| 88 continue | 91 continue |
| 89 #stop_err("ERROR: Problem with line: %r" % line) | 92 #sys_exit("ERROR: Problem with line: %r" % line) |
| 90 if likelihood not in ["ignored", | 93 if likelihood not in ["ignored", |
| 91 "Marginal prediction", | 94 "Marginal prediction", |
| 92 "Medium likely prediction", | 95 "Medium likely prediction", |
| 93 "Highly likely prediction"]: | 96 "Highly likely prediction"]: |
| 94 stop_err("ERROR: Problem with line: %r" % line) | 97 sys_exit("ERROR: Problem with line: %r" % line) |
| 95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) | 98 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) |
| 96 return queries | 99 return queries |
| 97 | 100 |
| 98 working_dir, bin = get_path_and_binary() | 101 working_dir, bin = get_path_and_binary() |
| 99 | 102 |
| 100 if not os.path.isfile(fasta_file): | 103 if not os.path.isfile(fasta_file): |
| 101 stop_err("ERROR: Missing input FASTA file %r" % fasta_file) | 104 sys_exit("ERROR: Missing input FASTA file %r" % fasta_file) |
| 102 | 105 |
| 103 #Note that if the input FASTA file contains no sequences, | 106 #Note that if the input FASTA file contains no sequences, |
| 104 #split_fasta returns an empty list (i.e. zero temp files). | 107 #split_fasta returns an empty list (i.e. zero temp files). |
| 105 #We deliberately omit the FASTA descriptions to avoid a | 108 #We deliberately omit the FASTA descriptions to avoid a |
| 106 #bug in promoter2 with descriptions over 200 characters. | 109 #bug in promoter2 with descriptions over 200 characters. |
| 131 try: | 134 try: |
| 132 output = open(temp).readline() | 135 output = open(temp).readline() |
| 133 except IOError: | 136 except IOError: |
| 134 output = "" | 137 output = "" |
| 135 clean_up(fasta_files + temp_files) | 138 clean_up(fasta_files + temp_files) |
| 136 stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 139 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), |
| 137 error_level) | 140 error_level) |
| 138 | 141 |
| 139 del results | 142 del results |
| 140 del jobs | 143 del jobs |
| 141 | 144 |
| 146 data_handle = open(temp) | 149 data_handle = open(temp) |
| 147 count = make_tabular(data_handle, out_handle) | 150 count = make_tabular(data_handle, out_handle) |
| 148 data_handle.close() | 151 data_handle.close() |
| 149 if not count: | 152 if not count: |
| 150 clean_up(fasta_files + temp_files) | 153 clean_up(fasta_files + temp_files) |
| 151 stop_err("No output from promoter2") | 154 sys_exit("No output from promoter2") |
| 152 queries += count | 155 queries += count |
| 153 out_handle.close() | 156 out_handle.close() |
| 154 | 157 |
| 155 clean_up(fasta_files + temp_files) | 158 clean_up(fasta_files + temp_files) |
| 156 print "Results for %i queries" % queries | 159 print "Results for %i queries" % queries |
