diff proteinortho_grab_proteins.xml @ 0:d348c3a151d9 draft

"planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 889335c0a31f156c3f90d4c2048cb4df155a53b2"
author iuc
date Tue, 18 Feb 2020 17:57:53 -0500
parents
children fa2d1e652ec1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteinortho_grab_proteins.xml	Tue Feb 18 17:57:53 2020 -0500
@@ -0,0 +1,112 @@
+<tool id="proteinortho_grab_proteins" name="Proteinortho grab proteins" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@">
+    <description>finds genes/proteins in a given fasta file</description>
+    <macros>
+        <import>proteinortho_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+        ## the following ln-action is necessary, since the file names are used by proteinortho (output contains filenames => species names)
+        #import re
+        #for $f in $input_files#
+            ln -sf '$f' '${re.sub('[^\w\-_.]', '_', f.element_identifier)}' &&
+        #end for#
+        #if $query.querytype == "file":
+            ln -sf '$query.queryfile' 'query' &&
+        #end if
+        mkdir output && cd output &&
+        proteinortho_grab_proteins.pl 
+            --tofiles
+            #if $regex:
+                $regex
+            #end if
+            #if $source:
+                $source
+            #end if
+            #if $query.querytype == "string":
+                '$query.querystring'
+            #else:
+                ../query
+            #end if
+            #for $f in $input_files#
+                ../${re.sub('[^\w\-_.]', '_', f.element_identifier)}
+            #end for#
+    ]]></command>
+    <inputs>
+        <param name="input_files" type="data" format="fasta" multiple="true" min="1" label="Select the input fasta files"/>    
+        <conditional name="query">
+            <param name="querytype" type="select" label="Query type">
+                <option value="string" selected="true">String</option>
+                <option value="file">orthology-groups output file</option>
+            </param>
+            <when value="string">
+                <param name="querystring" type="text" label="A string of the protein/gene name/identifier that you want to search">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.letters,string.digits">
+                            <add value="!"/>
+                            <add value="="/>
+                            <add value="-"/>
+                            <add value="."/>
+                            <add value="*"/>
+                            <add value="?"/>
+                            <add value="+"/>
+                            <add value="\\"/>
+                            <add value="_"/>
+                            <add value="|"/>
+                            <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->
+                            <add value="&#93;"/> <!-- right square bracket -->
+                            <add value="&#40;"/> <!-- left parenthesis -->
+                            <add value="&#41;"/> <!-- right parenthesis -->
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file">
+                <param name="queryfile" type="data" format="tabular" label="A orthology-groups file" help="For each group a fasta file is generated containing all proteins/genes of that group."/>
+            </when>
+        </conditional>
+        <param argument="--regex" type="boolean" checked="false" truevalue="-E" falsevalue="" label="Enable regular expressions (perl)" help="If not: the string is escaped (e.g. | -> \|) [-E]"/>
+        <param argument="--source" type="boolean" checked="false" truevalue="-source" falsevalue="" label="Add the filename to the gene/protein-name [--source]"/>
+    </inputs>
+    <outputs>
+        <collection name="listproteinorthograbproteins" type="list" label="${tool.name} on ${on_string}: list of fasta">
+            <discover_datasets pattern="__designation__" format="fasta" directory="output" visible="false"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
+            <param name="querytype" value="string"/>
+            <param name="querystring" value="E_1"/>
+            <output_collection name="listproteinorthograbproteins" count="1"/>
+        </test>
+        <test>
+            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
+            <param name="querytype" value="string"/>
+            <param name="regex" value="true"/>
+            <param name="source" value="true"/>
+            <param name="querystring" value="M..2"/>
+            <output_collection name="listproteinorthograbproteins" count="1"/>
+        </test>
+        <test>
+            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
+            <param name="querytype" value="file"/>
+            <param name="queryfile" value="result.proteinortho.tsv"/>
+            <output_collection name="listproteinorthograbproteins" count="34"/>
+        </test>
+    </tests>
+    <help><![CDATA[proteinortho grab proteins
+
+**What it does**
+
+proteinortho_grab_proteins : find gene(s)/protein(s) in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file, then all sequences for each group (one line of the file) is outputted. This can result in many files!
+
+**Other Proteinortho-Tools for downstream analysis**
+
+* `proteinortho summary` : Summaries the orthology-pairs/RBH files to determine how the species are connected to each other.
+
+More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>