Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/rxlr_motifs.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
19:f3ecd80850e2 | 20:a19b3ded8f33 |
---|---|
29 the predicted cleavage site, as this is expected to be more accurate. | 29 the predicted cleavage site, as this is expected to be more accurate. |
30 Also note that the HMM score values have changed from v2.0 to v3.0. | 30 Also note that the HMM score values have changed from v2.0 to v3.0. |
31 Whisson et al. (2007) used SignalP v3.0 anyway. | 31 Whisson et al. (2007) used SignalP v3.0 anyway. |
32 | 32 |
33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model | 33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model |
34 can still be used with hmmsearch from HMMER 3 this this does give | 34 can still be used with hmmsearch from HMMER 3, sadly this does give |
35 slightly different results. We expect the hmmsearch from HMMER 2.3.2 | 35 slightly different results. We expect the hmmsearch from HMMER 2.3.2 |
36 (the last stable release of HMMER 2) to be present on the path under | 36 (the last stable release of HMMER 2) to be present on the path under |
37 the name hmmsearch2 (allowing it to co-exist with HMMER 3). | 37 the name hmmsearch2 (allowing it to co-exist with HMMER 3). |
38 | |
39 If using Conda, you should therefore install the special "hmmer2" | |
40 package from BioConda which provides "hmmsearch2" etc:: | |
41 | |
42 conda install -c bioconda hmmer2 | |
43 | |
44 See https://bioconda.github.io/recipes/hmmer2/README.html and | |
45 https://anaconda.org/bioconda/hmmer2 | |
38 """ | 46 """ |
47 | |
48 from __future__ import print_function | |
49 | |
39 import os | 50 import os |
40 import sys | |
41 import re | 51 import re |
42 import subprocess | 52 import subprocess |
53 import sys | |
54 | |
43 from seq_analysis_utils import fasta_iterator | 55 from seq_analysis_utils import fasta_iterator |
44 | 56 |
45 if "-v" in sys.argv: | 57 if "-v" in sys.argv: |
46 print("RXLR Motifs v0.0.10") | 58 print("RXLR Motifs v0.0.14") |
47 sys.exit(0) | 59 sys.exit(0) |
48 | 60 |
49 if len(sys.argv) != 5: | 61 if len(sys.argv) != 5: |
50 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") | 62 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") |
51 | 63 |
89 sys.exit("Did not recognise the model name %r\n" | 101 sys.exit("Did not recognise the model name %r\n" |
90 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) | 102 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) |
91 | 103 |
92 | 104 |
93 def get_hmmer_version(exe, required=None): | 105 def get_hmmer_version(exe, required=None): |
94 cmd = "%s -h" % exe | |
95 try: | 106 try: |
96 child = subprocess.Popen([exe, "-h"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 107 child = subprocess.Popen([exe, "-h"], |
108 universal_newlines=True, | |
109 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
97 except OSError: | 110 except OSError: |
98 raise ValueError("Could not run %s" % exe) | 111 raise ValueError("Could not run %s" % exe) |
99 stdout, stderr = child.communicate() | 112 stdout, stderr = child.communicate() |
100 if required: | 113 if required: |
101 return required in stdout | 114 return required in stdout |
108 | 121 |
109 | 122 |
110 # Run hmmsearch for Whisson et al. (2007) | 123 # Run hmmsearch for Whisson et al. (2007) |
111 if model == "Whisson2007": | 124 if model == "Whisson2007": |
112 hmm_file = os.path.join(os.path.split(sys.argv[0])[0], | 125 hmm_file = os.path.join(os.path.split(sys.argv[0])[0], |
113 "whisson_et_al_rxlr_eer_cropped.hmm") | 126 "whisson_et_al_rxlr_eer_cropped.hmm") |
114 if not os.path.isfile(hmm_file): | 127 if not os.path.isfile(hmm_file): |
115 sys.exit("Missing HMM file for Whisson et al. (2007)") | 128 sys.exit("Missing HMM file for Whisson et al. (2007)") |
116 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): | 129 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): |
117 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) | 130 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) |
118 | 131 |
273 # Cleanup | 286 # Cleanup |
274 os.remove(signalp_input_file) | 287 os.remove(signalp_input_file) |
275 os.remove(signalp_output_file) | 288 os.remove(signalp_output_file) |
276 | 289 |
277 # Short summary to stdout for Galaxy's info display | 290 # Short summary to stdout for Galaxy's info display |
278 print "%s for %i sequences:" % (model, total) | 291 print("%s for %i sequences:" % (model, total)) |
279 print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems())) | 292 print(", ".join("%s = %i" % kv for kv in sorted(tally.iteritems()))) |