Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/rxlr_motifs.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
| author | peterjc |
|---|---|
| date | Thu, 21 Sep 2017 11:35:20 -0400 |
| parents | f3ecd80850e2 |
| children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
| 19:f3ecd80850e2 | 20:a19b3ded8f33 |
|---|---|
| 29 the predicted cleavage site, as this is expected to be more accurate. | 29 the predicted cleavage site, as this is expected to be more accurate. |
| 30 Also note that the HMM score values have changed from v2.0 to v3.0. | 30 Also note that the HMM score values have changed from v2.0 to v3.0. |
| 31 Whisson et al. (2007) used SignalP v3.0 anyway. | 31 Whisson et al. (2007) used SignalP v3.0 anyway. |
| 32 | 32 |
| 33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model | 33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model |
| 34 can still be used with hmmsearch from HMMER 3 this this does give | 34 can still be used with hmmsearch from HMMER 3, sadly this does give |
| 35 slightly different results. We expect the hmmsearch from HMMER 2.3.2 | 35 slightly different results. We expect the hmmsearch from HMMER 2.3.2 |
| 36 (the last stable release of HMMER 2) to be present on the path under | 36 (the last stable release of HMMER 2) to be present on the path under |
| 37 the name hmmsearch2 (allowing it to co-exist with HMMER 3). | 37 the name hmmsearch2 (allowing it to co-exist with HMMER 3). |
| 38 | |
| 39 If using Conda, you should therefore install the special "hmmer2" | |
| 40 package from BioConda which provides "hmmsearch2" etc:: | |
| 41 | |
| 42 conda install -c bioconda hmmer2 | |
| 43 | |
| 44 See https://bioconda.github.io/recipes/hmmer2/README.html and | |
| 45 https://anaconda.org/bioconda/hmmer2 | |
| 38 """ | 46 """ |
| 47 | |
| 48 from __future__ import print_function | |
| 49 | |
| 39 import os | 50 import os |
| 40 import sys | |
| 41 import re | 51 import re |
| 42 import subprocess | 52 import subprocess |
| 53 import sys | |
| 54 | |
| 43 from seq_analysis_utils import fasta_iterator | 55 from seq_analysis_utils import fasta_iterator |
| 44 | 56 |
| 45 if "-v" in sys.argv: | 57 if "-v" in sys.argv: |
| 46 print("RXLR Motifs v0.0.10") | 58 print("RXLR Motifs v0.0.14") |
| 47 sys.exit(0) | 59 sys.exit(0) |
| 48 | 60 |
| 49 if len(sys.argv) != 5: | 61 if len(sys.argv) != 5: |
| 50 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") | 62 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") |
| 51 | 63 |
| 89 sys.exit("Did not recognise the model name %r\n" | 101 sys.exit("Did not recognise the model name %r\n" |
| 90 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) | 102 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) |
| 91 | 103 |
| 92 | 104 |
| 93 def get_hmmer_version(exe, required=None): | 105 def get_hmmer_version(exe, required=None): |
| 94 cmd = "%s -h" % exe | |
| 95 try: | 106 try: |
| 96 child = subprocess.Popen([exe, "-h"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 107 child = subprocess.Popen([exe, "-h"], |
| 108 universal_newlines=True, | |
| 109 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 97 except OSError: | 110 except OSError: |
| 98 raise ValueError("Could not run %s" % exe) | 111 raise ValueError("Could not run %s" % exe) |
| 99 stdout, stderr = child.communicate() | 112 stdout, stderr = child.communicate() |
| 100 if required: | 113 if required: |
| 101 return required in stdout | 114 return required in stdout |
| 108 | 121 |
| 109 | 122 |
| 110 # Run hmmsearch for Whisson et al. (2007) | 123 # Run hmmsearch for Whisson et al. (2007) |
| 111 if model == "Whisson2007": | 124 if model == "Whisson2007": |
| 112 hmm_file = os.path.join(os.path.split(sys.argv[0])[0], | 125 hmm_file = os.path.join(os.path.split(sys.argv[0])[0], |
| 113 "whisson_et_al_rxlr_eer_cropped.hmm") | 126 "whisson_et_al_rxlr_eer_cropped.hmm") |
| 114 if not os.path.isfile(hmm_file): | 127 if not os.path.isfile(hmm_file): |
| 115 sys.exit("Missing HMM file for Whisson et al. (2007)") | 128 sys.exit("Missing HMM file for Whisson et al. (2007)") |
| 116 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): | 129 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): |
| 117 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) | 130 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) |
| 118 | 131 |
| 273 # Cleanup | 286 # Cleanup |
| 274 os.remove(signalp_input_file) | 287 os.remove(signalp_input_file) |
| 275 os.remove(signalp_output_file) | 288 os.remove(signalp_output_file) |
| 276 | 289 |
| 277 # Short summary to stdout for Galaxy's info display | 290 # Short summary to stdout for Galaxy's info display |
| 278 print "%s for %i sequences:" % (model, total) | 291 print("%s for %i sequences:" % (model, total)) |
| 279 print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems())) | 292 print(", ".join("%s = %i" % kv for kv in sorted(tally.iteritems()))) |
