Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/promoter2.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
19:f3ecd80850e2 | 20:a19b3ded8f33 |
---|---|
16 tab separated table. | 16 tab separated table. |
17 | 17 |
18 Additionally, in order to take advantage of multiple cores the input FASTA | 18 Additionally, in order to take advantage of multiple cores the input FASTA |
19 file is broken into chunks and multiple copies of promoter run at once. | 19 file is broken into chunks and multiple copies of promoter run at once. |
20 This can be used in combination with the job-splitting available in Galaxy. | 20 This can be used in combination with the job-splitting available in Galaxy. |
21 | |
22 Note that rewriting the FASTA input file allows us to avoid a bug in | 21 Note that rewriting the FASTA input file allows us to avoid a bug in |
23 promoter 2 with long descriptions in the FASTA header line (over 200 | 22 promoter 2 with long descriptions in the FASTA header line (over 200 |
24 characters) which produces stray fragements of the description in the | 23 characters) which produces stray fragements of the description in the |
25 output file, making parsing non-trivial. | 24 output file, making parsing non-trivial. |
26 | 25 |
27 TODO - Automatically extract the sequence containing a promoter prediction? | 26 TODO - Automatically extract the sequence containing a promoter prediction? |
28 """ | 27 """ |
28 | |
29 from __future__ import print_function | |
30 | |
31 import commands | |
32 import os | |
29 import sys | 33 import sys |
30 import os | |
31 import commands | |
32 import tempfile | 34 import tempfile |
33 from seq_analysis_utils import split_fasta, run_jobs, thread_count | 35 |
36 from seq_analysis_utils import run_jobs, split_fasta, thread_count | |
34 | 37 |
35 FASTA_CHUNK = 500 | 38 FASTA_CHUNK = 500 |
36 | 39 |
37 if "-v" in sys.argv or "--version" in sys.argv: | 40 if "-v" in sys.argv or "--version" in sys.argv: |
38 sys.exit(os.system("promoter -V")) | 41 sys.exit(os.system("promoter -V")) |
47 | 50 |
48 tmp_dir = tempfile.mkdtemp() | 51 tmp_dir = tempfile.mkdtemp() |
49 | 52 |
50 | 53 |
51 def get_path_and_binary(): | 54 def get_path_and_binary(): |
55 """Determine path and binary names for promoter tool.""" | |
52 platform = commands.getoutput("uname") # e.g. Linux | 56 platform = commands.getoutput("uname") # e.g. Linux |
53 shell_script = commands.getoutput("which promoter") | 57 shell_script = commands.getoutput("which promoter") |
54 if not os.path.isfile(shell_script): | 58 if not os.path.isfile(shell_script): |
55 sys.exit("ERROR: Missing promoter executable shell script") | 59 sys.exit("ERROR: Missing promoter executable shell script") |
56 path = None | 60 path = None |
72 def make_tabular(raw_handle, out_handle): | 76 def make_tabular(raw_handle, out_handle): |
73 """Parse text output into tabular, return query count.""" | 77 """Parse text output into tabular, return query count.""" |
74 identifier = None | 78 identifier = None |
75 queries = 0 | 79 queries = 0 |
76 for line in raw_handle: | 80 for line in raw_handle: |
77 # print repr(line) | 81 # print(repr(line)) |
78 if not line.strip() or line == "Promoter prediction:\n": | 82 if not line.strip() or line == "Promoter prediction:\n": |
79 pass | 83 pass |
80 elif line[0] != " ": | 84 elif line[0] != " ": |
81 identifier = line.strip().replace("\t", " ").split(None, 1)[0] | 85 identifier = line.strip().replace("\t", " ").split(None, 1)[0] |
82 queries += 1 | 86 queries += 1 |
87 assert identifier | 91 assert identifier |
88 else: | 92 else: |
89 try: | 93 try: |
90 position, score, likelihood = line.strip().split(None, 2) | 94 position, score, likelihood = line.strip().split(None, 2) |
91 except ValueError: | 95 except ValueError: |
92 print "WARNING: Problem with line: %r" % line | 96 print("WARNING: Problem with line: %r" % line) |
93 continue | 97 continue |
94 # sys.exit("ERROR: Problem with line: %r" % line) | 98 # sys.exit("ERROR: Problem with line: %r" % line) |
95 if likelihood not in ["ignored", | 99 if likelihood not in ["ignored", |
96 "Marginal prediction", | 100 "Marginal prediction", |
97 "Medium likely prediction", | 101 "Medium likely prediction", |
98 "Highly likely prediction"]: | 102 "Highly likely prediction"]: |
99 sys.exit("ERROR: Problem with line: %r" % line) | 103 sys.exit("ERROR: Problem with line: %r" % line) |
100 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) | 104 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) |
101 return queries | 105 return queries |
106 | |
102 | 107 |
103 working_dir, bin = get_path_and_binary() | 108 working_dir, bin = get_path_and_binary() |
104 | 109 |
105 if not os.path.isfile(fasta_file): | 110 if not os.path.isfile(fasta_file): |
106 sys.exit("ERROR: Missing input FASTA file %r" % fasta_file) | 111 sys.exit("ERROR: Missing input FASTA file %r" % fasta_file) |
122 try: | 127 try: |
123 os.rmdir(tmp_dir) | 128 os.rmdir(tmp_dir) |
124 except Exception: | 129 except Exception: |
125 pass | 130 pass |
126 | 131 |
132 | |
127 if len(jobs) > 1 and num_threads > 1: | 133 if len(jobs) > 1 and num_threads > 1: |
128 # A small "info" message for Galaxy to show the user. | 134 # A small "info" message for Galaxy to show the user. |
129 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 135 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) |
130 cur_dir = os.path.abspath(os.curdir) | 136 cur_dir = os.path.abspath(os.curdir) |
131 os.chdir(working_dir) | 137 os.chdir(working_dir) |
132 results = run_jobs(jobs, num_threads) | 138 results = run_jobs(jobs, num_threads) |
133 os.chdir(cur_dir) | 139 os.chdir(cur_dir) |
134 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): | 140 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): |
157 sys.exit("No output from promoter2") | 163 sys.exit("No output from promoter2") |
158 queries += count | 164 queries += count |
159 out_handle.close() | 165 out_handle.close() |
160 | 166 |
161 clean_up(fasta_files + temp_files) | 167 clean_up(fasta_files + temp_files) |
162 print "Results for %i queries" % queries | 168 print("Results for %i queries" % queries) |