diff tools/protein_analysis/signalp3.xml @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author peterjc
date Thu, 21 Sep 2017 11:35:20 -0400
parents eb6ac44d4b8e
children 238eae32483c
line wrap: on
line diff
--- a/tools/protein_analysis/signalp3.xml	Wed Feb 01 09:46:42 2017 -0500
+++ b/tools/protein_analysis/signalp3.xml	Thu Sep 21 11:35:20 2017 -0400
@@ -1,24 +1,19 @@
-<tool id="signalp3" name="SignalP 3.0" version="0.0.15">
+<tool id="signalp3" name="SignalP 3.0" version="0.0.19">
     <description>Find signal peptides in protein sequences</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <!-- Using 2000 chunks meaning 4 threads doing 500 each is ideal -->
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
     <requirements>
-        <requirement type="binary">signalp</requirement>
         <requirement type="package">signalp</requirement>
     </requirements>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
-    <command interpreter="python">
-      signalp3.py $organism $truncate "\$GALAXY_SLOTS" $fasta_file $tabular_file
-      ##If the environment variable isn't set, get "", and the python wrapper
-      ##defaults to four threads.
+    <version_command>
+python $__tool_directory__/signalp3.py --version
+    </version_command>
+    <command detect_errors="aggressive">
+python $__tool_directory__/signalp3.py $organism $truncate "\$GALAXY_SLOTS" '$fasta_file' '$tabular_file'
     </command>
     <inputs>
-        <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
+        <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/>
         <param name="organism" type="select" display="radio" label="Organism">
             <option value="euk">Eukaryote</option>
             <option value="gram+">Gram positive</option>
@@ -35,36 +30,36 @@
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>
             <param name="organism" value="euk"/>
-            <param name="truncate" value="0"/> 
+            <param name="truncate" value="0"/>
             <output name="tabular_file" file="four_human_proteins.signalp3.tabular" ftype="tabular"/>
         </test>
         <test>
             <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
             <param name="organism" value="euk"/>
-            <param name="truncate" value="60"/> 
+            <param name="truncate" value="60"/>
             <output name="tabular_file" file="empty_signalp3.tabular" ftype="tabular"/>
         </test>
         <test>
             <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
             <param name="organism" value="gram+"/>
-            <param name="truncate" value="80"/> 
+            <param name="truncate" value="80"/>
             <output name="tabular_file" file="empty_signalp3.tabular" ftype="tabular"/>
         </test>
         <test>
             <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
             <param name="organism" value="gram-"/>
-            <param name="truncate" value="0"/> 
+            <param name="truncate" value="0"/>
             <output name="tabular_file" file="empty_signalp3.tabular" ftype="tabular"/>
         </test>
         <test>
             <param name="fasta_file" value="rxlr_win_et_al_2007.fasta" ftype="fasta"/>
             <param name="organism" value="euk"/>
-            <param name="truncate" value="70"/> 
+            <param name="truncate" value="70"/>
             <output name="tabular_file" file="rxlr_win_et_al_2007_sp3.tabular" ftype="tabular"/>
         </test>
     </tests>
     <help>
-    
+
 **What it does**
 
 This calls the SignalP v3.0 tool for prediction of signal peptides, which uses both a Neural Network (NN) and Hidden Markov Model (HMM) to produce two sets of scores.
@@ -83,12 +78,12 @@
 
 **Neural Network Scores**
 
-For each organism class (Eukaryote, Gram-negative and Gram-positive), two different neural networks are used, one for predicting the actual signal peptide and one for predicting the position of the signal peptidase I (SPase I) cleavage site. 
+For each organism class (Eukaryote, Gram-negative and Gram-positive), two different neural networks are used, one for predicting the actual signal peptide and one for predicting the position of the signal peptidase I (SPase I) cleavage site.
 
 The NN output comprises three different scores (C-max, S-max and Y-max) and two scores derived from them (S-mean and D-score).
 
 ====== ======= ===============================================================
-Column Name    Description 
+Column Name    Description
 ------ ------- ---------------------------------------------------------------
    2-4 C-score The C-score is the 'cleavage site' score. For each position in
                the submitted sequence, a C-score is reported, which should
@@ -141,15 +136,15 @@
 
 The raw output 'short' output from TMHMM v2.0 looks something like this (21 columns space separated - shown here formatted nicely). Notice that the identifiers are given twice, the first time truncated (as part of the NN predictions) and the second time in full (in the HMM predictions).
 
-====================  ===== === =  ===== === =  ===== === =  ===== =  ===== =   ===================================  =  ===== === =  ===== =
-# SignalP-NN euk predictions                                   	                # SignalP-HMM euk predictions
------------------------------------------------------------------------------   ------------------------------------------------------------
-# name                Cmax  pos ?  Ymax  pos ?  Smax  pos ?  Smean ?  D     ? 	# name                               !  Cmax  pos ?  Sprob ?
-gi|2781234|pdb|1JLY|  0.061  17 N  0.043  17 N  0.199   1 N  0.067 N  0.055 N	gi|2781234|pdb|1JLY|B                Q  0.000  17 N  0.000 N  
-gi|4959044|gb|AAD342  0.099 191 N  0.012  38 N  0.023  12 N  0.014 N  0.013 N	gi|4959044|gb|AAD34209.1|AF069992_1  Q  0.000   0 N  0.000 N  
-gi|671626|emb|CAA856  0.139 381 N  0.020   8 N  0.121   4 N  0.067 N  0.044 N	gi|671626|emb|CAA85685.1|            Q  0.000   0 N  0.000 N  
-gi|3298468|dbj|BAA31  0.208  24 N  0.184  38 N  0.980  32 Y  0.613 Y  0.398 N	gi|3298468|dbj|BAA31520.1|           Q  0.066  24 N  0.139 N
-====================  ===== === =  ===== === =  ===== === =  ===== =  ===== =   ===================================  =  ===== === =  ===== =
+====================  ===== === =  ===== === =  ===== === =  ===== =  ===== = ===================================  =  ===== === =  ===== =
+# SignalP-NN euk predictions                                                  # SignalP-HMM euk predictions
+----------------------------------------------------------------------------- ------------------------------------------------------------
+# name                Cmax  pos ?  Ymax  pos ?  Smax  pos ?  Smean ?  D     ? # name                               !  Cmax  pos ?  Sprob ?
+gi|2781234|pdb|1JLY|  0.061  17 N  0.043  17 N  0.199   1 N  0.067 N  0.055 N gi|2781234|pdb|1JLY|B                Q  0.000  17 N  0.000 N
+gi|4959044|gb|AAD342  0.099 191 N  0.012  38 N  0.023  12 N  0.014 N  0.013 N gi|4959044|gb|AAD34209.1|AF069992_1  Q  0.000   0 N  0.000 N
+gi|671626|emb|CAA856  0.139 381 N  0.020   8 N  0.121   4 N  0.067 N  0.044 N gi|671626|emb|CAA85685.1|            Q  0.000   0 N  0.000 N
+gi|3298468|dbj|BAA31  0.208  24 N  0.184  38 N  0.980  32 Y  0.613 Y  0.398 N gi|3298468|dbj|BAA31520.1|           Q  0.066  24 N  0.139 N
+====================  ===== === =  ===== === =  ===== === =  ===== =  ===== = ===================================  =  ===== === =  ===== =
 
 In order to make this easier to use in Galaxy, the wrapper script simplifies this to remove the redundant column and use tabs for separation. It also includes a header line with unique column names.