tmhmm_and_signalp: tools/protein_analysis/rxlr

comparison tools/protein_analysis/rxlr_motifs.py @ 16:7de64c8b258d draft

Uploaded v0.2.5, MIT licence, RST for README, citation information, development moved to GitHub

author	peterjc
date	Wed, 18 Sep 2013 06:16:58 -0400
parents	a290c6d4e658
children	eb6ac44d4b8e

comparison

equal deleted inserted replaced

-:6abd809cefdd
+:7de64c8b258d
 "whisson_et_al_rxlr_eer_cropped.hmm")
 if not os.path.isfile(hmm_file):
 stop_err("Missing HMM file for Whisson et al. (2007)")
 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
 stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher)
-#I've left the code to handle HMMER 3 in situ, in case
-#we revisit the choice to insist on HMMER 2.
-hmmer3 = (3 == get_hmmer_version(hmmer_search))
-#Using zero (or 5.6?) for bitscore threshold
-if hmmer3:
-#The HMMER3 table output is easy to parse
-#In HMMER3 can't use both -T and -E
-cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
-% (hmmer_search, hmm_output_file, hmm_file, fasta_file)
-else:
-#For HMMER2 we are stuck with parsing stdout
-#Put 1e6 to effectively have no expectation threshold (otherwise
-#HMMER defaults to 10 and the calculated e-value depends on the
-#input FASTA file, and we can loose hits of interest).
-cmd = "%s -T 0 -E 1e6 %s %s > %s" \
-% (hmmer_search, hmm_file, fasta_file, hmm_output_file)
-return_code = os.system(cmd)
-if return_code:
-stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd))
 hmm_hits = set()
 valid_ids = set()
 for title, seq in fasta_iterator(fasta_file):
 name = title.split(None,1)[0]
 if name in valid_ids:
 stop_err("Duplicated identifier %r" % name)
 else:
 valid_ids.add(name)
-handle = open(hmm_output_file)
+if not valid_ids:
-for line in handle:
+#Special case, don't need to run HMMER if there are no sequences
-if not line.strip():
+pass
-#We expect blank lines in the HMMER2 stdout
+else:
-continue
+#I've left the code to handle HMMER 3 in situ, in case
-elif line.startswith("#"):
+#we revisit the choice to insist on HMMER 2.
-#Header
+hmmer3 = (3 == get_hmmer_version(hmmer_search))
-continue
+#Using zero (or 5.6?) for bitscore threshold
+if hmmer3:
+#The HMMER3 table output is easy to parse
+#In HMMER3 can't use both -T and -E
+cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
+% (hmmer_search, hmm_output_file, hmm_file, fasta_file)
 else:
-name = line.split(None,1)[0]
+#For HMMER2 we are stuck with parsing stdout
-#Should be a sequence name in the HMMER3 table output.
+#Put 1e6 to effectively have no expectation threshold (otherwise
-#Could be anything in the HMMER2 stdout.
+#HMMER defaults to 10 and the calculated e-value depends on the
-if name in valid_ids:
+#input FASTA file, and we can loose hits of interest).
-hmm_hits.add(name)
+cmd = "%s -T 0 -E 1e6 %s %s > %s" \
-elif hmmer3:
+% (hmmer_search, hmm_file, fasta_file, hmm_output_file)
-stop_err("Unexpected identifer %r in hmmsearch output" % name)
+return_code = os.system(cmd)
-handle.close()
+if return_code:
-#if hmmer3:
+stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
-#    print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
-#else:
+handle = open(hmm_output_file)
-#    print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+for line in handle:
-#print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
+if not line.strip():
-os.remove(hmm_output_file)
+#We expect blank lines in the HMMER2 stdout
+continue
+elif line.startswith("#"):
+#Header
+continue
+else:
+name = line.split(None,1)[0]
+#Should be a sequence name in the HMMER3 table output.
+#Could be anything in the HMMER2 stdout.
+if name in valid_ids:
+hmm_hits.add(name)
+elif hmmer3:
+stop_err("Unexpected identifer %r in hmmsearch output" % name)
+handle.close()
+#if hmmer3:
+#    print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+#else:
+#    print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+#print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
+os.remove(hmm_output_file)
 del valid_ids
 #Prepare short list of candidates containing RXLR to pass to SignalP
 assert min_rxlr_start > 0, "Min value one, since zero based counting"

Mercurial > repos > peterjc > tmhmm_and_signalp

comparison tools/protein_analysis/rxlr_motifs.py @ 16:7de64c8b258d draft