diff tools/protein_analysis/rxlr_motifs.py @ 19:f3ecd80850e2 draft

v0.2.9 Python style improvements
author peterjc
date Wed, 01 Feb 2017 09:46:42 -0500
parents eb6ac44d4b8e
children a19b3ded8f33
line wrap: on
line diff
--- a/tools/protein_analysis/rxlr_motifs.py	Tue Sep 01 09:56:36 2015 -0400
+++ b/tools/protein_analysis/rxlr_motifs.py	Wed Feb 01 09:46:42 2017 -0500
@@ -40,14 +40,14 @@
 import sys
 import re
 import subprocess
-from seq_analysis_utils import sys_exit, fasta_iterator
+from seq_analysis_utils import fasta_iterator
 
 if "-v" in sys.argv:
     print("RXLR Motifs v0.0.10")
     sys.exit(0)
 
 if len(sys.argv) != 5:
-    sys_exit("Requires four arguments: protein FASTA filename, threads, model, and output filename")
+    sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename")
 
 fasta_file, threads, model, tabular_file = sys.argv[1:]
 hmm_output_file = tabular_file + ".hmm.tmp"
@@ -86,8 +86,8 @@
     min_rxlr_start = 1
     max_rxlr_start = max_sp + max_sp_rxlr
 else:
-   sys_exit("Did not recognise the model name %r\n"
-            "Use Bhattacharjee2006, Win2007, or Whisson2007" % model)
+    sys.exit("Did not recognise the model name %r\n"
+             "Use Bhattacharjee2006, Win2007, or Whisson2007" % model)
 
 
 def get_hmmer_version(exe, required=None):
@@ -105,23 +105,23 @@
         return 3
     else:
         raise ValueError("Could not determine version of %s" % exe)
-    
+
 
-#Run hmmsearch for Whisson et al. (2007)
+# Run hmmsearch for Whisson et al. (2007)
 if model == "Whisson2007":
     hmm_file = os.path.join(os.path.split(sys.argv[0])[0],
                        "whisson_et_al_rxlr_eer_cropped.hmm")
     if not os.path.isfile(hmm_file):
-        sys_exit("Missing HMM file for Whisson et al. (2007)")
+        sys.exit("Missing HMM file for Whisson et al. (2007)")
     if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
-        sys_exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search)
+        sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search)
 
     hmm_hits = set()
     valid_ids = set()
     for title, seq in fasta_iterator(fasta_file):
-        name = title.split(None,1)[0]
+        name = title.split(None, 1)[0]
         if name in valid_ids:
-            sys_exit("Duplicated identifier %r" % name)
+            sys.exit("Duplicated identifier %r" % name)
         else:
             valid_ids.add(name)
     if not valid_ids:
@@ -146,7 +146,7 @@
                   % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
         return_code = os.system(cmd)
         if return_code:
-            sys_exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
+            sys.exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
 
         handle = open(hmm_output_file)
         for line in handle:
@@ -157,18 +157,18 @@
                 # Header
                 continue
             else:
-                name = line.split(None,1)[0]
-                #Should be a sequence name in the HMMER3 table output.
-                #Could be anything in the HMMER2 stdout.
+                name = line.split(None, 1)[0]
+                # Should be a sequence name in the HMMER3 table output.
+                # Could be anything in the HMMER2 stdout.
                 if name in valid_ids:
                     hmm_hits.add(name)
                 elif hmmer3:
-                    sys_exit("Unexpected identifer %r in hmmsearch output" % name)
+                    sys.exit("Unexpected identifer %r in hmmsearch output" % name)
         handle.close()
         # if hmmer3:
         #     print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
         # else:
-        #     print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))  
+        #     print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
         # print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
         os.remove(hmm_output_file)
     del valid_ids
@@ -181,8 +181,8 @@
 handle = open(signalp_input_file, "w")
 for title, seq in fasta_iterator(fasta_file):
     total += 1
-    name = title.split(None,1)[0]
-    match = re_rxlr.search(seq[min_rxlr_start-1:].upper())
+    name = title.split(None, 1)[0]
+    match = re_rxlr.search(seq[min_rxlr_start - 1:].upper())
     if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start:
         # This is a potential RXLR, depending on the SignalP results.
         # Might as well truncate the sequence now, makes the temp file smaller
@@ -199,11 +199,11 @@
 # Run SignalP (using our wrapper script to get multi-core support etc)
 signalp_script = os.path.join(os.path.split(sys.argv[0])[0], "signalp3.py")
 if not os.path.isfile(signalp_script):
-    sys_exit("Error - missing signalp3.py script")
+    sys.exit("Error - missing signalp3.py script")
 cmd = "python %s euk %i %s %s %s" % (signalp_script, signalp_trunc, threads, signalp_input_file, signalp_output_file)
 return_code = os.system(cmd)
 if return_code:
-    sys_exit("Error %i from SignalP:\n%s" % (return_code, cmd))
+    sys.exit("Error %i from SignalP:\n%s" % (return_code, cmd))
 # print "SignalP done"
 
 
@@ -217,8 +217,8 @@
     assert line.startswith("#ID\t"), line
     for line in handle:
         parts = line.rstrip("\t").split("\t")
-        assert len(parts)==20, repr(line)
-        yield parts[0], float(parts[18]), int(parts[5])-1
+        assert len(parts) == 20, repr(line)
+        yield parts[0], float(parts[18]), int(parts[5]) - 1
     handle.close()
 
 
@@ -231,12 +231,12 @@
 for title, seq in fasta_iterator(fasta_file):
     total += 1
     rxlr = "N"
-    name = title.split(None,1)[0]
-    match = re_rxlr.search(seq[min_rxlr_start-1:].upper())
+    name = title.split(None, 1)[0]
+    match = re_rxlr.search(seq[min_rxlr_start - 1:].upper())
     if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start:
         del match
         # This was the criteria for calling SignalP,
-        #so it will be in the SignalP results.
+        # so it will be in the SignalP results.
         sp_id, sp_hmm_score, sp_nn_len = signalp_results.next()
         assert name == sp_id, "%s vs %s" % (name, sp_id)
         if sp_hmm_score >= min_signalp_hmm and min_sp <= sp_nn_len <= max_sp: