changeset 3:863ab6ebcafc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prot-scriber commit bac22f562727babce8e0f456c82408c3063a683d
author iuc
date Sat, 18 May 2024 20:36:38 +0000
parents 4d4df9779b7b
children
files prot-scriber.xml test-data/8_Proteins_prot-scriber.out
diffstat 2 files changed, 121 insertions(+), 124 deletions(-) [+]
line wrap: on
line diff
--- a/prot-scriber.xml	Tue Sep 20 09:45:46 2022 +0000
+++ b/prot-scriber.xml	Sat May 18 20:36:38 2024 +0000
@@ -1,15 +1,15 @@
 <tool id="prot_scriber" name="prot-scriber" version="@TOOL_VERSION@" profile="21.05">
-  <description>Protein annotation of short human readable descriptions</description>
-  <macros>
-    <token name="@TOOL_VERSION@">0.1.4</token>
-  </macros>
-  <requirements>
-    <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement>
-  </requirements>
-  <stdio>
-    <regex match="panicked" level="fatal" source="stderr" />
-  </stdio>
-  <command>
+    <description>Protein annotation of short human readable descriptions</description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.1.5</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement>
+    </requirements>
+    <stdio>
+        <regex match="panicked" level="fatal" source="stderr"/>
+    </stdio>
+    <command>
     <![CDATA['prot-scriber'
     #if str($input_config.input_config_selector) == "basic"
       #for $sst in $input_config.seq_sim_table
@@ -75,117 +75,103 @@
     -o '$output'
     ]]>
   </command>
-  <inputs>
-    <conditional name="input_config">
-      <param type="select" name="input_config_selector" label="Choose input configuration options">
-        <option value="basic" selected="true">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="basic">
-        <param type="data" multiple="true" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.
-        Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." /> 
-      </when>
-      <when value="advanced">
-        <repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1">
-          <param type="data" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.
-          Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." /> 
-          <param type="text" optional="true" name="field_separator" argument="-p" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default">
-            <sanitizer>
-              <valid initial="default">
-                <add preset="string.printable" />
-              </valid>
-            </sanitizer>
-          </param>
-          <param type="text" optional="true" name="header" argument="-e" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the
-            in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default" />
-          <param type="data" optional="true" name="blacklist_regexs" argument="-b" format="tabular" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these
-            regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default" />
-          <param type="data" optional="true" name="capture_replace_pairs" argument="-c" format="tabular" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions
-            defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default" />
-          <param type="data" optional="true" name="filter_regexs" argument="-l" format="tabular" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these
-            regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default" /> 
-        </repeat>
-        <section title="Expert options" name="expert_options">
-          <param type="data" optional="true" name="non_informative_words_regexs" argument="-w" format="tabular" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These
-            regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description." />
-          <param type="text" optional="true" name="description_split_regex" argument="-r" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast
-            terminology) into words. Default is '([~_\-/|\;,':.\s]+)'.">
-            <sanitizer>
-              <valid initial="default">
-                <add preset="string.printable" />
-              </valid>
-            </sanitizer>
-          </param>
-          <param type="integer" optional="true" name="center_inverse_word_information_content_at_quantile" argument="-q" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information
-            content to center these values. Value between 0 and 1." />
-          <param type="data" optional="true" name="polish_capture_replace_pairs" argument="-d" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace
-            pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/>
+    <inputs>
+        <conditional name="input_config">
+            <param name="input_config_selector" type="select" label="Choose input configuration options">
+                <option value="basic" selected="true">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic">
+                <param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.         Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." multiple="true"/>
+            </when>
+            <when value="advanced">
+                <repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1">
+                    <param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.           Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)."/>
+                    <param name="field_separator" argument="-p" type="text" optional="true" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default">
+                        <sanitizer>
+                            <valid initial="default">
+                                <add preset="string.printable"/>
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param name="header" argument="-e" type="text" optional="true" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the             in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default"/>
+                    <param name="blacklist_regexs" argument="-b" type="data" format="tabular" optional="true" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these             regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default"/>
+                    <param name="capture_replace_pairs" argument="-c" type="data" format="tabular" optional="true" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions             defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default"/>
+                    <param name="filter_regexs" argument="-l" type="data" format="tabular" optional="true" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these             regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default"/>
+                </repeat>
+                <section title="Expert options" name="expert_options">
+                    <param name="non_informative_words_regexs" argument="-w" type="data" format="tabular" optional="true" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These             regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description."/>
+                    <param name="description_split_regex" argument="-r" type="text" optional="true" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast             terminology) into words. Default is '([~_\-/|\;,':.\s]+)'.">
+                        <sanitizer>
+                            <valid initial="default">
+                                <add preset="string.printable"/>
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param name="center_inverse_word_information_content_at_quantile" argument="-q" type="integer" optional="true" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information             content to center these values. Value between 0 and 1."/>
+                    <param name="polish_capture_replace_pairs" argument="-d" type="data" format="txt" optional="true" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace             pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/>
+                </section>
+            </when>
+        </conditional>
+        <section title="Sequence family annotation" name="seq_family">
+            <param name="seq_families" argument="-f" type="data" format="tabular" optional="true" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each         line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in         more than one family."/>
+            <param name="annotate_non_family_queries" argument="-a" type="boolean" optional="true" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family."/>
+            <param name="seq_family_gene_ids_separator" argument="-g" type="text" optional="true" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the         argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'.">
+                <sanitizer>
+                    <valid initial="default">
+                        <add preset="string.printable"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="seq_family_id_genes_separator" argument="-i" type="text" optional="true" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This         string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'.">
+                <sanitizer>
+                    <valid initial="default">
+                        <add preset="string.printable"/>
+                    </valid>
+                </sanitizer>
+            </param>
         </section>
-      </when>
-    </conditional>
-    <section title="Sequence family annotation" name="seq_family">
-      <param type="data" optional="true" name="seq_families" argument="-f" format="tabular" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each
-        line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in
-        more than one family." />
-      <param type="boolean" optional="true" name="annotate_non_family_queries" argument="-a" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family." />
-      <param type="text" optional="true" name="seq_family_gene_ids_separator" argument="-g" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the
-        argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'.">
-        <sanitizer>
-          <valid initial="default">
-            <add preset="string.printable" />
-          </valid>
-        </sanitizer>
-      </param>
-      <param type="text" optional="true" name="seq_family_id_genes_separator" argument="-i" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This
-        string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'.">
-        <sanitizer>
-          <valid initial="default">
-            <add preset="string.printable" />
-          </valid>
-        </sanitizer>
-      </param>
-    </section>
-    <param type="boolean" optional="true" name="exclude_not_annotated_queries" argument="-x" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_config_selector" value="basic"/>
-      <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt" />
-      <output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
-    </test>
-    <test>
-      <param name="input_config_selector" value="advanced" />
-      <repeat name="advanced_input_repeat">
-        <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" />
-        <param name="field_separator" value="default" />
-        <param name="header" value="qacc sacc stitle" />
-      </repeat>
-      <repeat name="advanced_input_repeat">
-        <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
-        <param name="field_separator" value="default" />
-        <param name="header" value="qacc sacc stitle" />
-      </repeat>
-      <output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
-    </test>
-    <test>
-      <param name="input_config_selector" value="advanced" />
-      <repeat name="advanced_input_repeat">
-        <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" />
-        <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
-      </repeat>
-      <repeat name="advanced_input_repeat">
-        <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
-        <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
-      </repeat>
-      <param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)" />
-      <param name="center_inverse_word_information_content_at_quantile" value="50" />
-      <output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
-    </test>
-  </tests>
-  <help>
+        <param name="exclude_not_annotated_queries" argument="-x" type="boolean" optional="true" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_config_selector" value="basic"/>
+            <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt"/>
+            <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
+        </test>
+        <test>
+            <param name="input_config_selector" value="advanced"/>
+            <repeat name="advanced_input_repeat">
+                <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
+                <param name="field_separator" value="default"/>
+                <param name="header" value="qacc sacc stitle"/>
+            </repeat>
+            <repeat name="advanced_input_repeat">
+                <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
+                <param name="field_separator" value="default"/>
+                <param name="header" value="qacc sacc stitle"/>
+            </repeat>
+            <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
+        </test>
+        <test>
+            <param name="input_config_selector" value="advanced"/>
+            <repeat name="advanced_input_repeat">
+                <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
+                <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
+            </repeat>
+            <repeat name="advanced_input_repeat">
+                <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
+                <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
+            </repeat>
+            <param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)"/>
+            <param name="center_inverse_word_information_content_at_quantile" value="50"/>
+            <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
+        </test>
+    </tests>
+    <help>
     <![CDATA[
 
 **What it does**
@@ -343,4 +329,15 @@
 
     ]]>
   </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{githubprot-scriber,
+            author = {Asis Hallab},
+            year = {2024},
+            title = {prot-scriber},
+            publisher = {Github},
+            journal = {Github repository},
+            url = {https://github.com/usadellab/prot-scriber},
+        }</citation>
+    </citations>
 </tool>
--- a/test-data/8_Proteins_prot-scriber.out	Tue Sep 20 09:45:46 2022 +0000
+++ b/test-data/8_Proteins_prot-scriber.out	Sat May 18 20:36:38 2024 +0000
@@ -1,12 +1,12 @@
 Annotee-Identifier	Human-Readable-Description
 Soltu.DM.01G022510.1	sucrose nonfermenting protein x2
-Soltu.DM.02G020600.1	arath protein strubbelig receptor family
+Soltu.DM.02G020600.1	protein strubbelig receptor family
 Soltu.DM.10G003150.1	sh and domain containing protein
 Soltu.DM.04G035790.1	phosphatidylinositol phosphatidylcholine transfer protein sfh
 Soltu.DM.02G015700.1	lrr receptor serine threonine protein kinase
 Soltu.DM.03G026010.1	arm repeat protein interacting with
-Soltu.DM.07G016620.1	capch gdsl esterase lipase
+Soltu.DM.07G016620.1	gdsl esterase lipase
 Soltu.DM.S001650.1	germin protein member
 Soltu.DM.03G011280.1	increased dna methylation
-Soltu.DM.01G045390.1	gosmu hva protein
+Soltu.DM.01G045390.1	hva protein
 Soltu.DM.09G022410.3	unknown protein