comparison tools/protein_analysis/promoter2.py @ 18:eb6ac44d4b8e draft

Suite v0.2.8, record Promoter 2 verion + misc internal updates
author peterjc
date Tue, 01 Sep 2015 09:56:36 -0400
parents e52220a9ddad
children f3ecd80850e2
comparison
equal deleted inserted replaced
17:e6cc27d182a8 18:eb6ac44d4b8e
28 """ 28 """
29 import sys 29 import sys
30 import os 30 import os
31 import commands 31 import commands
32 import tempfile 32 import tempfile
33 from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count 33 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
34 34
35 FASTA_CHUNK = 500 35 FASTA_CHUNK = 500
36 36
37 if "-v" in sys.argv or "--version" in sys.argv:
38 sys.exit(os.system("promoter -V"))
39
37 if len(sys.argv) != 4: 40 if len(sys.argv) != 4:
38 stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " 41 sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
39 "Got %i arguments." % (len(sys.argv)-1)) 42 "Got %i arguments." % (len(sys.argv)-1))
40 43
41 num_threads = thread_count(sys.argv[3],default=4) 44 num_threads = thread_count(sys.argv[3],default=4)
42 fasta_file = os.path.abspath(sys.argv[2]) 45 fasta_file = os.path.abspath(sys.argv[2])
43 tabular_file = os.path.abspath(sys.argv[3]) 46 tabular_file = os.path.abspath(sys.argv[3])
46 49
47 def get_path_and_binary(): 50 def get_path_and_binary():
48 platform = commands.getoutput("uname") #e.g. Linux 51 platform = commands.getoutput("uname") #e.g. Linux
49 shell_script = commands.getoutput("which promoter") 52 shell_script = commands.getoutput("which promoter")
50 if not os.path.isfile(shell_script): 53 if not os.path.isfile(shell_script):
51 stop_err("ERROR: Missing promoter executable shell script") 54 sys_exit("ERROR: Missing promoter executable shell script")
52 path = None 55 path = None
53 for line in open(shell_script): 56 for line in open(shell_script):
54 if line.startswith("setenv"): #could then be tab or space! 57 if line.startswith("setenv"): #could then be tab or space!
55 parts = line.rstrip().split(None, 2) 58 parts = line.rstrip().split(None, 2)
56 if parts[0] == "setenv" and parts[1] == "PROM": 59 if parts[0] == "setenv" and parts[1] == "PROM":
57 path = parts[2] 60 path = parts[2]
58 if not path: 61 if not path:
59 stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script) 62 sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
60 if not os.path.isdir(path): 63 if not os.path.isdir(path):
61 stop_error("ERROR: %r is not a directory" % path) 64 sys_exit("ERROR: %r is not a directory" % path)
62 bin = "%s/bin/promoter_%s" % (path, platform) 65 bin = "%s/bin/promoter_%s" % (path, platform)
63 if not os.path.isfile(bin): 66 if not os.path.isfile(bin):
64 stop_err("ERROR: Missing promoter binary %r" % bin) 67 sys_exit("ERROR: Missing promoter binary %r" % bin)
65 return path, bin 68 return path, bin
66 69
67 def make_tabular(raw_handle, out_handle): 70 def make_tabular(raw_handle, out_handle):
68 """Parse text output into tabular, return query count.""" 71 """Parse text output into tabular, return query count."""
69 identifier = None 72 identifier = None
84 try: 87 try:
85 position, score, likelihood = line.strip().split(None,2) 88 position, score, likelihood = line.strip().split(None,2)
86 except ValueError: 89 except ValueError:
87 print "WARNING: Problem with line: %r" % line 90 print "WARNING: Problem with line: %r" % line
88 continue 91 continue
89 #stop_err("ERROR: Problem with line: %r" % line) 92 #sys_exit("ERROR: Problem with line: %r" % line)
90 if likelihood not in ["ignored", 93 if likelihood not in ["ignored",
91 "Marginal prediction", 94 "Marginal prediction",
92 "Medium likely prediction", 95 "Medium likely prediction",
93 "Highly likely prediction"]: 96 "Highly likely prediction"]:
94 stop_err("ERROR: Problem with line: %r" % line) 97 sys_exit("ERROR: Problem with line: %r" % line)
95 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) 98 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood))
96 return queries 99 return queries
97 100
98 working_dir, bin = get_path_and_binary() 101 working_dir, bin = get_path_and_binary()
99 102
100 if not os.path.isfile(fasta_file): 103 if not os.path.isfile(fasta_file):
101 stop_err("ERROR: Missing input FASTA file %r" % fasta_file) 104 sys_exit("ERROR: Missing input FASTA file %r" % fasta_file)
102 105
103 #Note that if the input FASTA file contains no sequences, 106 #Note that if the input FASTA file contains no sequences,
104 #split_fasta returns an empty list (i.e. zero temp files). 107 #split_fasta returns an empty list (i.e. zero temp files).
105 #We deliberately omit the FASTA descriptions to avoid a 108 #We deliberately omit the FASTA descriptions to avoid a
106 #bug in promoter2 with descriptions over 200 characters. 109 #bug in promoter2 with descriptions over 200 characters.
131 try: 134 try:
132 output = open(temp).readline() 135 output = open(temp).readline()
133 except IOError: 136 except IOError:
134 output = "" 137 output = ""
135 clean_up(fasta_files + temp_files) 138 clean_up(fasta_files + temp_files)
136 stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 139 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
137 error_level) 140 error_level)
138 141
139 del results 142 del results
140 del jobs 143 del jobs
141 144
146 data_handle = open(temp) 149 data_handle = open(temp)
147 count = make_tabular(data_handle, out_handle) 150 count = make_tabular(data_handle, out_handle)
148 data_handle.close() 151 data_handle.close()
149 if not count: 152 if not count:
150 clean_up(fasta_files + temp_files) 153 clean_up(fasta_files + temp_files)
151 stop_err("No output from promoter2") 154 sys_exit("No output from promoter2")
152 queries += count 155 queries += count
153 out_handle.close() 156 out_handle.close()
154 157
155 clean_up(fasta_files + temp_files) 158 clean_up(fasta_files + temp_files)
156 print "Results for %i queries" % queries 159 print "Results for %i queries" % queries