Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/rxlr_motifs.py @ 19:f3ecd80850e2 draft
v0.2.9 Python style improvements
author | peterjc |
---|---|
date | Wed, 01 Feb 2017 09:46:42 -0500 |
parents | eb6ac44d4b8e |
children | a19b3ded8f33 |
line wrap: on
line diff
--- a/tools/protein_analysis/rxlr_motifs.py Tue Sep 01 09:56:36 2015 -0400 +++ b/tools/protein_analysis/rxlr_motifs.py Wed Feb 01 09:46:42 2017 -0500 @@ -40,14 +40,14 @@ import sys import re import subprocess -from seq_analysis_utils import sys_exit, fasta_iterator +from seq_analysis_utils import fasta_iterator if "-v" in sys.argv: print("RXLR Motifs v0.0.10") sys.exit(0) if len(sys.argv) != 5: - sys_exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") + sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") fasta_file, threads, model, tabular_file = sys.argv[1:] hmm_output_file = tabular_file + ".hmm.tmp" @@ -86,8 +86,8 @@ min_rxlr_start = 1 max_rxlr_start = max_sp + max_sp_rxlr else: - sys_exit("Did not recognise the model name %r\n" - "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) + sys.exit("Did not recognise the model name %r\n" + "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) def get_hmmer_version(exe, required=None): @@ -105,23 +105,23 @@ return 3 else: raise ValueError("Could not determine version of %s" % exe) - + -#Run hmmsearch for Whisson et al. (2007) +# Run hmmsearch for Whisson et al. (2007) if model == "Whisson2007": hmm_file = os.path.join(os.path.split(sys.argv[0])[0], "whisson_et_al_rxlr_eer_cropped.hmm") if not os.path.isfile(hmm_file): - sys_exit("Missing HMM file for Whisson et al. (2007)") + sys.exit("Missing HMM file for Whisson et al. (2007)") if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): - sys_exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) + sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) hmm_hits = set() valid_ids = set() for title, seq in fasta_iterator(fasta_file): - name = title.split(None,1)[0] + name = title.split(None, 1)[0] if name in valid_ids: - sys_exit("Duplicated identifier %r" % name) + sys.exit("Duplicated identifier %r" % name) else: valid_ids.add(name) if not valid_ids: @@ -146,7 +146,7 @@ % (hmmer_search, hmm_file, fasta_file, hmm_output_file) return_code = os.system(cmd) if return_code: - sys_exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code) + sys.exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code) handle = open(hmm_output_file) for line in handle: @@ -157,18 +157,18 @@ # Header continue else: - name = line.split(None,1)[0] - #Should be a sequence name in the HMMER3 table output. - #Could be anything in the HMMER2 stdout. + name = line.split(None, 1)[0] + # Should be a sequence name in the HMMER3 table output. + # Could be anything in the HMMER2 stdout. if name in valid_ids: hmm_hits.add(name) elif hmmer3: - sys_exit("Unexpected identifer %r in hmmsearch output" % name) + sys.exit("Unexpected identifer %r in hmmsearch output" % name) handle.close() # if hmmer3: # print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) # else: - # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) + # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids)) # print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids)) os.remove(hmm_output_file) del valid_ids @@ -181,8 +181,8 @@ handle = open(signalp_input_file, "w") for title, seq in fasta_iterator(fasta_file): total += 1 - name = title.split(None,1)[0] - match = re_rxlr.search(seq[min_rxlr_start-1:].upper()) + name = title.split(None, 1)[0] + match = re_rxlr.search(seq[min_rxlr_start - 1:].upper()) if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start: # This is a potential RXLR, depending on the SignalP results. # Might as well truncate the sequence now, makes the temp file smaller @@ -199,11 +199,11 @@ # Run SignalP (using our wrapper script to get multi-core support etc) signalp_script = os.path.join(os.path.split(sys.argv[0])[0], "signalp3.py") if not os.path.isfile(signalp_script): - sys_exit("Error - missing signalp3.py script") + sys.exit("Error - missing signalp3.py script") cmd = "python %s euk %i %s %s %s" % (signalp_script, signalp_trunc, threads, signalp_input_file, signalp_output_file) return_code = os.system(cmd) if return_code: - sys_exit("Error %i from SignalP:\n%s" % (return_code, cmd)) + sys.exit("Error %i from SignalP:\n%s" % (return_code, cmd)) # print "SignalP done" @@ -217,8 +217,8 @@ assert line.startswith("#ID\t"), line for line in handle: parts = line.rstrip("\t").split("\t") - assert len(parts)==20, repr(line) - yield parts[0], float(parts[18]), int(parts[5])-1 + assert len(parts) == 20, repr(line) + yield parts[0], float(parts[18]), int(parts[5]) - 1 handle.close() @@ -231,12 +231,12 @@ for title, seq in fasta_iterator(fasta_file): total += 1 rxlr = "N" - name = title.split(None,1)[0] - match = re_rxlr.search(seq[min_rxlr_start-1:].upper()) + name = title.split(None, 1)[0] + match = re_rxlr.search(seq[min_rxlr_start - 1:].upper()) if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start: del match # This was the criteria for calling SignalP, - #so it will be in the SignalP results. + # so it will be in the SignalP results. sp_id, sp_hmm_score, sp_nn_len = signalp_results.next() assert name == sp_id, "%s vs %s" % (name, sp_id) if sp_hmm_score >= min_signalp_hmm and min_sp <= sp_nn_len <= max_sp: