changeset 18:eb6ac44d4b8e draft

Suite v0.2.8, record Promoter 2 verion + misc internal updates
author peterjc
date Tue, 01 Sep 2015 09:56:36 -0400
parents e6cc27d182a8
children f3ecd80850e2
files tools/protein_analysis/README.rst tools/protein_analysis/promoter2.py tools/protein_analysis/promoter2.xml tools/protein_analysis/psortb.py tools/protein_analysis/psortb.xml tools/protein_analysis/rxlr_motifs.py tools/protein_analysis/rxlr_motifs.xml tools/protein_analysis/seq_analysis_utils.py tools/protein_analysis/signalp3.py tools/protein_analysis/signalp3.xml tools/protein_analysis/suite_config.xml tools/protein_analysis/tmhmm2.py tools/protein_analysis/tmhmm2.xml tools/protein_analysis/wolf_psort.py tools/protein_analysis/wolf_psort.xml
diffstat 15 files changed, 295 insertions(+), 223 deletions(-) [+]
line wrap: on
line diff
--- a/tools/protein_analysis/README.rst	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/README.rst	Tue Sep 01 09:56:36 2015 -0400
@@ -14,7 +14,7 @@
 To use these Galaxy wrappers you must first install the command line tools.
 At the time of writing they are all free for academic use, or open source.
 
-These wrappers are copyright 2010-2013 by Peter Cock, James Hutton Institute
+These wrappers are copyright 2010-2015 by Peter Cock, James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 Contributions/revisions copyright 2011 Konrad Paszkiewicz. All rights reserved.
 See the included LICENCE file for details (MIT open source licence).
@@ -174,6 +174,10 @@
 v0.2.6  - Use the new ``$GALAXY_SLOTS`` environment variable for thread count.
         - Updated the ``suite_config.xml`` file (overdue).
         - Tool definition now embeds citation information.
+v0.2.7  - Style cleanup in Python scripts.
+v0.2.8  - Reorder XML elements (internal change only).
+        - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
+        - Record version of Promoter 2 via ``<version_command>``.
 ======= ======================================================================
 
 
@@ -187,10 +191,61 @@
 Development has now moved to a dedicated GitHub repository:
 https://github.com/peterjc/pico_galaxy/tree/master/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
+
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+    $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+
+or::
+
+    $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+
+To just build and check the tar ball, use::
 
-    $ ./tools/protein_analysis/make_tmhmm_and_signalp.sh
+    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+    $ tar -tzf shed_upload.tar.gz 
+    test-data/Adenovirus.fasta
+    test-data/Adenovirus.promoter2.tabular
+    test-data/empty.fasta
+    test-data/empty_promoter2.tabular
+    test-data/empty_psortb_terse.tabular
+    test-data/empty_rxlr.Bhattacharjee2006.tabular
+    test-data/empty_rxlr.Whisson2007.tabular
+    test-data/empty_rxlr.Win2007.tabular
+    test-data/empty_signalp3.tabular
+    test-data/empty_tmhmm2.tabular
+    test-data/empty_wolf_psort.tabular
+    test-data/four_human_proteins.fasta
+    test-data/four_human_proteins.signalp3.tabular
+    test-data/four_human_proteins.tmhmm2.tabular
+    test-data/four_human_proteins.wolf_psort.tabular
+    test-data/k12_ten_proteins.fasta
+    test-data/k12_ten_proteins_psortb_p_terse.tabular
+    test-data/rxlr_win_et_al_2007.fasta
+    test-data/rxlr_win_et_al_2007.tabular
+    test-data/rxlr_win_et_al_2007_sp3.tabular
+    tools/protein_analysis/LICENSE.txt
+    tools/protein_analysis/README.rst
+    tools/protein_analysis/promoter2.py
+    tools/protein_analysis/promoter2.xml
+    tools/protein_analysis/psortb.py
+    tools/protein_analysis/psortb.xml
+    tools/protein_analysis/rxlr_motifs.py
+    tools/protein_analysis/rxlr_motifs.xml
+    tools/protein_analysis/seq_analysis_utils.py
+    tools/protein_analysis/signalp3.py
+    tools/protein_analysis/signalp3.xml
+    tools/protein_analysis/suite_config.xml
+    tools/protein_analysis/tmhmm2.py
+    tools/protein_analysis/tmhmm2.xml
+    tools/protein_analysis/whisson_et_al_rxlr_eer_cropped.hmm
+    tools/protein_analysis/wolf_psort.py
+    tools/protein_analysis/wolf_psort.xml
 
 This simplifies ensuring a consistent set of files is bundled each time,
 including all the relevant test files.
--- a/tools/protein_analysis/promoter2.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/promoter2.py	Tue Sep 01 09:56:36 2015 -0400
@@ -30,12 +30,15 @@
 import os
 import commands
 import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 
+if "-v" in sys.argv or "--version" in sys.argv:
+    sys.exit(os.system("promoter -V"))
+
 if len(sys.argv) != 4:
-    stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
+    sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
              "Got %i arguments." % (len(sys.argv)-1))
 
 num_threads = thread_count(sys.argv[3],default=4)
@@ -48,7 +51,7 @@
     platform = commands.getoutput("uname") #e.g. Linux
     shell_script = commands.getoutput("which promoter")
     if not os.path.isfile(shell_script):
-        stop_err("ERROR: Missing promoter executable shell script")
+        sys_exit("ERROR: Missing promoter executable shell script")
     path = None
     for line in open(shell_script):
         if line.startswith("setenv"): #could then be tab or space!
@@ -56,12 +59,12 @@
             if parts[0] == "setenv" and parts[1] == "PROM":
                 path = parts[2]
     if not path:
-        stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
+        sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
     if not os.path.isdir(path):
-        stop_error("ERROR: %r is not a directory" % path)
+        sys_exit("ERROR: %r is not a directory" % path)
     bin = "%s/bin/promoter_%s" % (path, platform)
     if not os.path.isfile(bin):
-        stop_err("ERROR: Missing promoter binary %r" % bin)
+        sys_exit("ERROR: Missing promoter binary %r" % bin)
     return path, bin
 
 def make_tabular(raw_handle, out_handle):
@@ -86,19 +89,19 @@
             except ValueError:
                 print "WARNING: Problem with line: %r" % line
                 continue
-                #stop_err("ERROR: Problem with line: %r" % line)
+                #sys_exit("ERROR: Problem with line: %r" % line)
             if likelihood not in ["ignored",
                                   "Marginal prediction",
                                   "Medium likely prediction",
                                   "Highly likely prediction"]:
-                stop_err("ERROR: Problem with line: %r" % line)
+                sys_exit("ERROR: Problem with line: %r" % line)
             out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood))
     return queries
     
 working_dir, bin = get_path_and_binary()
 
 if not os.path.isfile(fasta_file):
-   stop_err("ERROR: Missing input FASTA file %r" % fasta_file)
+   sys_exit("ERROR: Missing input FASTA file %r" % fasta_file)
 
 #Note that if the input FASTA file contains no sequences,
 #split_fasta returns an empty list (i.e. zero temp files).
@@ -133,7 +136,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 
 del results
@@ -148,7 +151,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from promoter2")
+        sys_exit("No output from promoter2")
     queries += count
 out_handle.close()
 
--- a/tools/protein_analysis/promoter2.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/promoter2.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,27 +1,29 @@
-<tool id="promoter2" name="Promoter 2.0" version="0.0.8">
+<tool id="promoter2" name="Promoter 2.0" version="0.0.10">
     <description>Find eukaryotic PolII promoters in DNA sequences</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <!-- Using 2000 per chunk so 4 threads each doing 500 is ideal -->
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">promoter</requirement>
+        <requirement type="package">promoter</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">promoter2.py --version</version_command>
     <command interpreter="python">
         promoter2.py "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file"
         ##If the environment variable isn't set, get "", and the python wrapper
         ##defaults to four threads.
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of DNA sequences"/> 
     </inputs>
     <outputs>
         <data name="tabular_file" format="tabular" label="Promoter2 on ${fasta_file.name}" />
     </outputs>
-    <requirements>
-        <requirement type="binary">promoter</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="Adenovirus.fasta" ftype="fasta"/>
--- a/tools/protein_analysis/psortb.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/psortb.py	Tue Sep 01 09:56:36 2015 -0400
@@ -24,7 +24,7 @@
 import sys
 import os
 import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 
@@ -33,7 +33,7 @@
     sys.exit(os.system("psort --version"))
 
 if len(sys.argv) != 8:
-    stop_err("Require 7 arguments, number of threads (int), type (e.g. archaea), "
+    sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), "
              "output (e.g. terse/normal/long), cutoff, divergent, input protein "
              "FASTA file & output tabular file")
 
@@ -56,7 +56,7 @@
 if out_type == "terse":
     header = ['SeqID', 'Localization', 'Score']
 elif out_type == "normal":
-    stop_err("Normal output not implemented yet, sorry.")
+    sys_exit("Normal output not implemented yet, sorry.")
 elif out_type == "long":
     if org_type == "-n":
         #Gram negative bacteria
@@ -93,9 +93,9 @@
                   'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
                   'Secondary_Localization', 'PSortb_Version']
     else:
-        stop_err("Expected -n, -p or -a for the organism type, not %r" % org_type)
+        sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type)
 else:
-    stop_err("Expected terse, normal or long for the output type, not %r" % out_type)
+    sys_exit("Expected terse, normal or long for the output type, not %r" % out_type)
 
 tmp_dir = tempfile.mkdtemp()
 
@@ -149,7 +149,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
 del jobs
@@ -163,7 +163,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from psortb")
+        sys_exit("No output from psortb")
 out_handle.close()
 print "%i records" % count
 
--- a/tools/protein_analysis/psortb.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/psortb.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,61 +1,62 @@
-<tool id="Psortb" name="psortb" version="0.0.5">
-  <description>Determines sub-cellular localisation of bacterial/archaeal protein sequences</description>
-  <!-- If job splitting is enabled, break up the query file into parts -->
-  <!-- Using 2000 chunks meaning 4 threads doing 500 each is ideal -->
-  <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
-  <version_command interpreter="python">psortb.py --version</version_command>
-  <command interpreter="python">
-    psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile"
-    ##If the environment variable isn't set, get "", and python wrapper
-    ##defaults to four threads.
-  </command>
-  <stdio>
-    <!-- Anything other than zero is an error -->
-    <exit_code range="1:" />
-    <exit_code range=":-1" />
-  </stdio>
-  <inputs>
-    <param format="fasta" name="sequence" type="data"
-           label="Input sequences for which to predict localisation (protein FASTA format)" />
-    <param name="type" type="select"
-           label="Organism type (N.B. all sequences in the above file must be of the same type)" >
-      <option value="-p">Gram positive bacteria</option>
-      <option value="-n">Gram negative bacteria</option>
-      <option value="-a">Archaea</option>
-    </param>
-    <param name="long" type="select" label="Output type">
-      <option value="terse">Short (terse, tabular with 3 columns)</option>
-      <!-- The normal output is text, not tabular - worth offering?
-      <option value="normal">Normal</option>
-      -->
-      <option value="long">Long (verbose, tabular with about 30 columns, depending on organism type)</option>
-    </param>
-    <param name="cutoff" size="10" type="float" optional="true" value=""
-           label="Sets a cutoff value for reported results (e.g. 7.5)"
-           help="Leave blank or use zero for no cutoff." />
-    <param name="divergent" size="10" type="float" optional="true" value=""
-           label="Sets a cutoff value for the multiple localization flag (e.g. 4.5)"
-           help="Leave blank or use zero for no cutoff." />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="outfile" />
-  </outputs>
-  <requirements>
-    <requirement type="binary">psort</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="sequence" value="empty.fasta" ftype="fasta"/>
-      <param name="long" value="terse"/>
-      <output name="outfile" file="empty_psortb_terse.tabular" ftype="tabular"/>
-    </test>
-    <test>
-      <param name="sequence" value="k12_ten_proteins.fasta" ftype="fasta"/>
-      <param name="long" value="terse"/>
-      <output name="outfile" file="k12_ten_proteins_psortb_p_terse.tabular" ftype="tabular"/>
-    </test>
-  </tests>
-  <help>
+<tool id="Psortb" name="psortb" version="0.0.7">
+    <description>Determines sub-cellular localisation of bacterial/archaeal protein sequences</description>
+    <!-- If job splitting is enabled, break up the query file into parts -->
+    <!-- Using 2000 chunks meaning 4 threads doing 500 each is ideal -->
+    <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">psort</requirement>
+        <requirement type="package">psort</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">psortb.py --version</version_command>
+    <command interpreter="python">
+psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile"
+##If the environment variable isn't set, get "", and python wrapper
+##defaults to four threads.
+    </command>
+    <inputs>
+        <param format="fasta" name="sequence" type="data"
+               label="Input sequences for which to predict localisation (protein FASTA format)" />
+        <param name="type" type="select"
+               label="Organism type (N.B. all sequences in the above file must be of the same type)" >
+            <option value="-p">Gram positive bacteria</option>
+            <option value="-n">Gram negative bacteria</option>
+            <option value="-a">Archaea</option>
+        </param>
+        <param name="long" type="select" label="Output type">
+            <option value="terse">Short (terse, tabular with 3 columns)</option>
+            <!-- The normal output is text, not tabular - worth offering?
+            <option value="normal">Normal</option>
+            -->
+            <option value="long">Long (verbose, tabular with about 30 columns, depending on organism type)</option>
+        </param>
+        <param name="cutoff" size="10" type="float" optional="true" value=""
+               label="Sets a cutoff value for reported results (e.g. 7.5)"
+               help="Leave blank or use zero for no cutoff." />
+        <param name="divergent" size="10" type="float" optional="true" value=""
+               label="Sets a cutoff value for the multiple localization flag (e.g. 4.5)"
+               help="Leave blank or use zero for no cutoff." />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sequence" value="empty.fasta" ftype="fasta"/>
+            <param name="long" value="terse"/>
+            <output name="outfile" file="empty_psortb_terse.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="sequence" value="k12_ten_proteins.fasta" ftype="fasta"/>
+            <param name="long" value="terse"/>
+            <output name="outfile" file="k12_ten_proteins_psortb_p_terse.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
 
 **What it does**
 
@@ -99,9 +100,9 @@
 
 This wrapper is available to install into other Galaxy Instances via the Galaxy
 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/tmhmm_and_signalp
+    </help>
     <citations>
         <citation type="doi">10.7717/peerj.167</citation>
         <citation type="doi">10.1093/bioinformatics/btq249</citation>
     </citations>
-  </help>
 </tool>
--- a/tools/protein_analysis/rxlr_motifs.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/rxlr_motifs.py	Tue Sep 01 09:56:36 2015 -0400
@@ -40,10 +40,14 @@
 import sys
 import re
 import subprocess
-from seq_analysis_utils import stop_err, fasta_iterator
+from seq_analysis_utils import sys_exit, fasta_iterator
+
+if "-v" in sys.argv:
+    print("RXLR Motifs v0.0.10")
+    sys.exit(0)
 
 if len(sys.argv) != 5:
-   stop_err("Requires four arguments: protein FASTA filename, threads, model, and output filename")
+    sys_exit("Requires four arguments: protein FASTA filename, threads, model, and output filename")
 
 fasta_file, threads, model, tabular_file = sys.argv[1:]
 hmm_output_file = tabular_file + ".hmm.tmp"
@@ -53,36 +57,36 @@
 hmmer_search = "hmmsearch2"
 
 if model == "Bhattacharjee2006":
-   signalp_trunc = 70
-   re_rxlr = re.compile("R.LR")
-   min_sp = 10
-   max_sp = 40
-   max_sp_rxlr = 100
-   min_rxlr_start = 1
-   #Allow signal peptide to be at most 40aa, and want RXLR to be
-   #within 100aa, therefore for the prescreen the max start is 140:
-   max_rxlr_start = max_sp + max_sp_rxlr
+    signalp_trunc = 70
+    re_rxlr = re.compile("R.LR")
+    min_sp = 10
+    max_sp = 40
+    max_sp_rxlr = 100
+    min_rxlr_start = 1
+    # Allow signal peptide to be at most 40aa, and want RXLR to be
+    # within 100aa, therefore for the prescreen the max start is 140:
+    max_rxlr_start = max_sp + max_sp_rxlr
 elif model == "Win2007":
-   signalp_trunc = 70
-   re_rxlr = re.compile("R.LR")
-   min_sp = 10
-   max_sp = 40
-   min_rxlr_start = 30
-   max_rxlr_start = 60
-   #No explicit limit on separation of signal peptide clevage
-   #and RXLR, but shortest signal peptide is 10, and furthest
-   #away RXLR is 60, so effectively limit is 50.
-   max_sp_rxlr = max_rxlr_start - min_sp + 1
+    signalp_trunc = 70
+    re_rxlr = re.compile("R.LR")
+    min_sp = 10
+    max_sp = 40
+    min_rxlr_start = 30
+    max_rxlr_start = 60
+    # No explicit limit on separation of signal peptide clevage
+    # and RXLR, but shortest signal peptide is 10, and furthest
+    # away RXLR is 60, so effectively limit is 50.
+    max_sp_rxlr = max_rxlr_start - min_sp + 1
 elif model == "Whisson2007":
-   signalp_trunc = 0 #zero for no truncation
-   re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]")
-   min_sp = 10
-   max_sp = 40
-   max_sp_rxlr = 100
-   min_rxlr_start = 1
-   max_rxlr_start = max_sp + max_sp_rxlr
+    signalp_trunc = 0  # zero for no truncation
+    re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]")
+    min_sp = 10
+    max_sp = 40
+    max_sp_rxlr = 100
+    min_rxlr_start = 1
+    max_rxlr_start = max_sp + max_sp_rxlr
 else:
-   stop_err("Did not recognise the model name %r\n"
+   sys_exit("Did not recognise the model name %r\n"
             "Use Bhattacharjee2006, Win2007, or Whisson2007" % model)
 
 
@@ -108,49 +112,49 @@
     hmm_file = os.path.join(os.path.split(sys.argv[0])[0],
                        "whisson_et_al_rxlr_eer_cropped.hmm")
     if not os.path.isfile(hmm_file):
-        stop_err("Missing HMM file for Whisson et al. (2007)")
+        sys_exit("Missing HMM file for Whisson et al. (2007)")
     if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
-        stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher)
+        sys_exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search)
 
     hmm_hits = set()
     valid_ids = set()
     for title, seq in fasta_iterator(fasta_file):
         name = title.split(None,1)[0]
         if name in valid_ids:
-            stop_err("Duplicated identifier %r" % name)
+            sys_exit("Duplicated identifier %r" % name)
         else:
             valid_ids.add(name)
     if not valid_ids:
-        #Special case, don't need to run HMMER if there are no sequences
+        # Special case, don't need to run HMMER if there are no sequences
         pass
     else:
-        #I've left the code to handle HMMER 3 in situ, in case
-        #we revisit the choice to insist on HMMER 2.
+        # I've left the code to handle HMMER 3 in situ, in case
+        # we revisit the choice to insist on HMMER 2.
         hmmer3 = (3 == get_hmmer_version(hmmer_search))
-        #Using zero (or 5.6?) for bitscore threshold
+        # Using zero (or 5.6?) for bitscore threshold
         if hmmer3:
-            #The HMMER3 table output is easy to parse
-            #In HMMER3 can't use both -T and -E
+            # The HMMER3 table output is easy to parse
+            # In HMMER3 can't use both -T and -E
             cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
                   % (hmmer_search, hmm_output_file, hmm_file, fasta_file)
         else:
-            #For HMMER2 we are stuck with parsing stdout
-            #Put 1e6 to effectively have no expectation threshold (otherwise
-            #HMMER defaults to 10 and the calculated e-value depends on the
-            #input FASTA file, and we can loose hits of interest).
+            # For HMMER2 we are stuck with parsing stdout
+            # Put 1e6 to effectively have no expectation threshold (otherwise
+            # HMMER defaults to 10 and the calculated e-value depends on the
+            # input FASTA file, and we can loose hits of interest).
             cmd = "%s -T 0 -E 1e6 %s %s > %s" \
                   % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
         return_code = os.system(cmd)
         if return_code:
-            stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
+            sys_exit("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
 
         handle = open(hmm_output_file)
         for line in handle:
             if not line.strip():
-                #We expect blank lines in the HMMER2 stdout
+                # We expect blank lines in the HMMER2 stdout
                 continue
             elif line.startswith("#"):
-                #Header
+                # Header
                 continue
             else:
                 name = line.split(None,1)[0]
@@ -159,18 +163,18 @@
                 if name in valid_ids:
                     hmm_hits.add(name)
                 elif hmmer3:
-                    stop_err("Unexpected identifer %r in hmmsearch output" % name)
+                    sys_exit("Unexpected identifer %r in hmmsearch output" % name)
         handle.close()
-        #if hmmer3:
-        #    print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
-        #else:
-        #    print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))  
-        #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
+        # if hmmer3:
+        #     print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+        # else:
+        #     print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))  
+        # print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
         os.remove(hmm_output_file)
     del valid_ids
 
 
-#Prepare short list of candidates containing RXLR to pass to SignalP
+# Prepare short list of candidates containing RXLR to pass to SignalP
 assert min_rxlr_start > 0, "Min value one, since zero based counting"
 count = 0
 total = 0
@@ -180,27 +184,28 @@
     name = title.split(None,1)[0]
     match = re_rxlr.search(seq[min_rxlr_start-1:].upper())
     if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start:
-        #This is a potential RXLR, depending on the SignalP results.
-        #Might as well truncate the sequence now, makes the temp file smaller
+        # This is a potential RXLR, depending on the SignalP results.
+        # Might as well truncate the sequence now, makes the temp file smaller
         if signalp_trunc:
             handle.write(">%s (truncated)\n%s\n" % (name, seq[:signalp_trunc]))
         else:
-            #Does it matter we don't line wrap?
+            # Does it matter we don't line wrap?
             handle.write(">%s\n%s\n" % (name, seq))
         count += 1
 handle.close()
-#print "Running SignalP on %i/%i potentials." % (count, total)
+# print "Running SignalP on %i/%i potentials." % (count, total)
 
 
-#Run SignalP (using our wrapper script to get multi-core support etc)
+# Run SignalP (using our wrapper script to get multi-core support etc)
 signalp_script = os.path.join(os.path.split(sys.argv[0])[0], "signalp3.py")
 if not os.path.isfile(signalp_script):
-    stop_err("Error - missing signalp3.py script")
+    sys_exit("Error - missing signalp3.py script")
 cmd = "python %s euk %i %s %s %s" % (signalp_script, signalp_trunc, threads, signalp_input_file, signalp_output_file)
 return_code = os.system(cmd)
 if return_code:
-    stop_err("Error %i from SignalP:\n%s" % (return_code, cmd))
-#print "SignalP done"
+    sys_exit("Error %i from SignalP:\n%s" % (return_code, cmd))
+# print "SignalP done"
+
 
 def parse_signalp(filename):
     """Parse SignalP output, yield tuples of ID, HMM_Sprob_score and NN predicted signal peptide length.
@@ -217,7 +222,7 @@
     handle.close()
 
 
-#Parse SignalP results and apply the strict RXLR criteria
+# Parse SignalP results and apply the strict RXLR criteria
 total = 0
 tally = dict()
 handle = open(tabular_file, "w")
@@ -230,26 +235,26 @@
     match = re_rxlr.search(seq[min_rxlr_start-1:].upper())
     if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start:
         del match
-        #This was the criteria for calling SignalP,
+        # This was the criteria for calling SignalP,
         #so it will be in the SignalP results.
         sp_id, sp_hmm_score, sp_nn_len = signalp_results.next()
         assert name == sp_id, "%s vs %s" % (name, sp_id)
         if sp_hmm_score >= min_signalp_hmm and min_sp <= sp_nn_len <= max_sp:
             match = re_rxlr.search(seq[sp_nn_len:].upper())
-            if match and match.start() + 1 <= max_sp_rxlr: #1-based counting
+            if match and match.start() + 1 <= max_sp_rxlr:  # 1-based counting
                 rxlr_start = sp_nn_len + match.start() + 1
                 if min_rxlr_start <= rxlr_start <= max_rxlr_start:
                     rxlr = "Y"
     if model == "Whisson2007":
-        #Combine the signalp with regular expression heuristic and the HMM
+        # Combine the signalp with regular expression heuristic and the HMM
         if name in hmm_hits and rxlr == "N":
-            rxlr = "hmm" #HMM only
+            rxlr = "hmm"  # HMM only
         elif rxlr == "N":
-            rxlr = "neither" #Don't use N (no)
+            rxlr = "neither"  # Don't use N (no)
         elif name not in hmm_hits and rxlr == "Y":
-            rxlr = "re" #Heuristic only
-        #Now have a four way classifier: Y, hmm, re, neither
-        #and count is the number of Y results (both HMM and heuristic)
+            rxlr = "re"  # Heuristic only
+        # Now have a four way classifier: Y, hmm, re, neither
+        # and count is the number of Y results (both HMM and heuristic)
     handle.write("%s\t%s\n" % (name, rxlr))
     try:
         tally[rxlr] += 1
@@ -258,17 +263,17 @@
 handle.close()
 assert sum(tally.values()) == total
 
-#Check the iterator is finished
+# Check the iterator is finished
 try:
     signalp_results.next()
     assert False, "Unexpected data in SignalP output"
 except StopIteration:
     pass
 
-#Cleanup
+# Cleanup
 os.remove(signalp_input_file)
 os.remove(signalp_output_file)
 
-#Short summary to stdout for Galaxy's info display
+# Short summary to stdout for Galaxy's info display
 print "%s for %i sequences:" % (model, total)
 print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems()))
--- a/tools/protein_analysis/rxlr_motifs.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/rxlr_motifs.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,13 +1,22 @@
-<tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.9">
+<tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.11">
     <description>Find RXLR Effectors of Plant Pathogenic Oomycetes</description>
-    <command interpreter="python">
-      rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file"
-    </command>
+    <requirements>
+        <!-- Need SignalP for all the models -->
+        <requirement type="binary">signalp</requirement>
+        <requirement type="package">signalp</requirement>
+        <!-- Need HMMER for Whisson et al. (2007) -->
+        <requirement type="binary">hmmsearch</requirement>
+        <requirement type="package">hmmsearch</requirement>
+    </requirements>
     <stdio>
         <!-- Anything other than zero is an error -->
         <exit_code range="1:" />
         <exit_code range=":-1" />
     </stdio>
+    <version_command interpreter="python">rxlr_motifs.py -v</version_command>
+    <command interpreter="python">
+      rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file"
+    </command>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences" /> 
         <param name="model" type="select" label="Which RXLR model?">
@@ -19,12 +28,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="$model.value_label" />
     </outputs>
-    <requirements>
-        <!-- Need SignalP for all the models -->
-        <requirement type="binary">signalp</requirement>
-        <!-- Need HMMER for Whisson et al. (2007) -->
-        <requirement type="binary">hmmsearch</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="rxlr_win_et_al_2007.fasta" ftype="fasta" />
--- a/tools/protein_analysis/seq_analysis_utils.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/seq_analysis_utils.py	Tue Sep 01 09:56:36 2015 -0400
@@ -14,7 +14,7 @@
 
 __version__ = "0.0.1"
 
-def stop_err(msg, error_level=1):
+def sys_exit(msg, error_level=1):
     """Print error message to stdout and quit with given error level."""
     sys.stderr.write("%s\n" % msg)
     sys.exit(error_level)
@@ -57,7 +57,7 @@
     except:
         num = default
     if num < 1:
-        stop_err("Threads argument %r is not a positive integer" % command_line_arg)
+        sys_exit("Threads argument %r is not a positive integer" % command_line_arg)
     #Cap this with the pysical limit of the machine,
     try:
         num = min(num, cpu_count())
--- a/tools/protein_analysis/signalp3.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/signalp3.py	Tue Sep 01 09:56:36 2015 -0400
@@ -56,28 +56,28 @@
 import sys
 import os
 import tempfile
-from seq_analysis_utils import stop_err, split_fasta, fasta_iterator
+from seq_analysis_utils import sys_exit, split_fasta, fasta_iterator
 from seq_analysis_utils import run_jobs, thread_count
 
 FASTA_CHUNK = 500
 MAX_LEN = 6000 #Found by trial and error
 
 if len(sys.argv) not in  [6,8]:
-    stop_err("Require five (or 7) arguments, organism, truncate, threads, "
+    sys_exit("Require five (or 7) arguments, organism, truncate, threads, "
              "input protein FASTA file & output tabular file (plus "
              "optionally cut method and GFF3 output file). "
              "Got %i arguments." % (len(sys.argv)-1))
 
 organism = sys.argv[1]
 if organism not in ["euk", "gram+", "gram-"]:
-    stop_err("Organism argument %s is not one of euk, gram+ or gram-" % organism)
+    sys_exit("Organism argument %s is not one of euk, gram+ or gram-" % organism)
 
 try:
     truncate = int(sys.argv[2])
 except:
     truncate = 0
 if truncate < 0:
-    stop_err("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2])
+    sys_exit("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2])
 
 num_threads = thread_count(sys.argv[3], default=4)
 fasta_file = sys.argv[4]
@@ -86,7 +86,7 @@
 if len(sys.argv) == 8:
     cut_method = sys.argv[6]
     if cut_method not in ["NN_Cmax", "NN_Ymax", "NN_Smax", "HMM_Cmax"]:
-        stop_err("Invalid cut method %r" % cut_method)
+        sys_exit("Invalid cut method %r" % cut_method)
     gff3_file = sys.argv[7]
 else:
     cut_method = None
@@ -197,7 +197,7 @@
         output = "(no output)"
     if error_level or output.lower().startswith("error running"):
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
 
--- a/tools/protein_analysis/signalp3.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/signalp3.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,18 +1,22 @@
-<tool id="signalp3" name="SignalP 3.0" version="0.0.14">
+<tool id="signalp3" name="SignalP 3.0" version="0.0.15">
     <description>Find signal peptides in protein sequences</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <!-- Using 2000 chunks meaning 4 threads doing 500 each is ideal -->
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">signalp</requirement>
+        <requirement type="package">signalp</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       signalp3.py $organism $truncate "\$GALAXY_SLOTS" $fasta_file $tabular_file
       ##If the environment variable isn't set, get "", and the python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
         <param name="organism" type="select" display="radio" label="Organism">
@@ -27,9 +31,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="SignalP $organism results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">signalp</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>
--- a/tools/protein_analysis/suite_config.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/suite_config.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,21 +1,21 @@
-    <suite id="tmhmm_and_signalp" name="Protein/gene sequence analysis tools" version="0.2.6">
+    <suite id="tmhmm_and_signalp" name="Protein/gene sequence analysis tools" version="0.2.8">
         <description>TMHMM, SignalP, RXLR motifs, WoLF PSORT</description>
-        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.12">
+        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.14">
             <description>Find transmembrane domains in protein sequences</description>
         </tool>
-        <tool id="signalp3" name="SignalP 3.0" version="0.0.13">
+        <tool id="signalp3" name="SignalP 3.0" version="0.0.15">
             <description>Find signal peptides in protein sequences</description>
         </tool>
-        <tool id="promoter2" name="Promoter 2.0" version="0.0.7">
+        <tool id="promoter2" name="Promoter 2.0" version="0.0.9">
             <description>Find eukaryotic PolII promoters in DNA sequences</description>
         </tool>
-        <tool id="psortb" name="PSORTb" version="0.0.4">
+        <tool id="psortb" name="PSORTb" version="0.0.6">
             <description>Bacteria/archaea protein subcellular localization prediction</description>
         </tool>
-        <tool id="wolf_psort" name="WoLF PSORT" version="0.0.7">
+        <tool id="wolf_psort" name="WoLF PSORT" version="0.0.9">
             <description>Eukaryote protein subcellular localization prediction</description>
         </tool>
-        <tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.8">
+        <tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.11">
             <description>Find RXLR Effectors of Plant Pathogenic Oomycetes</description>
         </tool>
     </suite>
--- a/tools/protein_analysis/tmhmm2.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/tmhmm2.py	Tue Sep 01 09:56:36 2015 -0400
@@ -43,12 +43,12 @@
 import sys
 import os
 import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 
 if len(sys.argv) != 4:
-    stop_err("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
+    sys_exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
 
 num_threads = thread_count(sys.argv[1], default=4)
 fasta_file = sys.argv[2]
@@ -68,7 +68,7 @@
             identifier, length, expAA, first60, predhel, topology = parts
         except:
             assert len(parts)!=6
-            stop_err("Bad line: %r" % line)
+            sys_exit("Bad line: %r" % line)
         assert length.startswith("len="), line
         length = length[4:]
         assert expAA.startswith("ExpAA="), line
@@ -112,7 +112,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
 del jobs
@@ -125,7 +125,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from tmhmm2")
+        sys_exit("No output from tmhmm2")
 out_handle.close()
 
 clean_up(fasta_files + temp_files)
--- a/tools/protein_analysis/tmhmm2.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/tmhmm2.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,18 +1,22 @@
-<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.13">
+<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.14">
     <description>Find transmembrane domains in protein sequences</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <!-- Using 2000 chunks meaning 4 threads doing 500 each is ideal -->
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">tmhmm</requirement>
+        <requirement type="package">tmhmm</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       tmhmm2.py "\$GALAXY_SLOTS" $fasta_file $tabular_file
       ##If the environment variable isn't set, get "", and the python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
         <!--
@@ -25,9 +29,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="TMHMM results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">tmhmm</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>
--- a/tools/protein_analysis/wolf_psort.py	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/wolf_psort.py	Tue Sep 01 09:56:36 2015 -0400
@@ -35,7 +35,7 @@
 """
 import sys
 import os
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 exe = "runWolfPsortSummary"
@@ -59,11 +59,11 @@
 """
 
 if len(sys.argv) != 5:
-    stop_err("Require four arguments, organism, threads, input protein FASTA file & output tabular file")
+    sys_exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file")
 
 organism = sys.argv[1]
 if organism not in ["animal", "plant", "fungi"]:
-    stop_err("Organism argument %s is not one of animal, plant, fungi" % organism)
+    sys_exit("Organism argument %s is not one of animal, plant, fungi" % organism)
 
 num_threads = thread_count(sys.argv[2], default=4)
 fasta_file = sys.argv[3]
@@ -106,7 +106,7 @@
     if error_level or output.lower().startswith("error running"):
         clean_up(fasta_files)
         clean_up(temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
 
--- a/tools/protein_analysis/wolf_psort.xml	Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/wolf_psort.xml	Tue Sep 01 09:56:36 2015 -0400
@@ -1,15 +1,19 @@
-<tool id="wolf_psort" name="WoLF PSORT" version="0.0.8">
+<tool id="wolf_psort" name="WoLF PSORT" version="0.0.9">
     <description>Eukaryote protein subcellular localization prediction</description>
+    <requirements>
+        <requirement type="binary">runWolfPsortSummary</requirement>
+        <requirement type="binary">psort</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       wolf_psort.py $organism "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file"
       ##If the environment variable isn't set, get "", and python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
         <param name="organism" type="select" display="radio" label="Organism">
@@ -21,9 +25,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="WoLF PSORT $organism results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">runWolfPsortSummary</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>