view proteinortho_grab_proteins.xml @ 10:36dffb2bc194 draft default tip

planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit e151cf96893602bf011c27a2d91df1ef594b774d
author iuc
date Fri, 13 Dec 2024 10:19:18 +0000
parents b183a90fe278
children
line wrap: on
line source

<tool id="proteinortho_grab_proteins" name="Proteinortho grab proteins" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@">
    <description>finds genes/proteins in a given fasta file</description>
    <macros>
        <import>proteinortho_macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
        ## the following ln-action is necessary, since the file names are used by proteinortho (output contains filenames => species names)
        #import re
        #for $f in $input_files#
            ln -sf '$f' '${re.sub('[^\w\-_.]', '_', f.element_identifier)}' &&
        #end for#
        #if $query.querytype == "file":
            ln -sf '$query.queryfile' 'query' &&
        #end if
        mkdir output && cd output &&
        proteinortho_grab_proteins.pl 
            --tofiles
            #if $regex:
                '$regex'
            #end if
            $source
            #if $query.querytype == "string":
                '$query.querystring'
            #else:
                ../query
            #end if
            #for $f in $input_files#
                ../${re.sub('[^\w\-_.]', '_', f.element_identifier)}
            #end for#
    ]]></command>
    <inputs>
        <param name="input_files" type="data" format="fasta" multiple="true" min="1" label="Select the input fasta files"/>    
        <conditional name="query">
            <param name="querytype" type="select" label="Query type">
                <option value="string" selected="true">String</option>
                <option value="file">orthology-groups output file</option>
            </param>
            <when value="string">
                <param name="querystring" type="text" label="A string of the protein/gene name/identifier that you want to search">
                    <validator type="regex" negate="true" message="Identifier must not end with a backslash">.*\\$</validator>
                    <sanitizer invalid_char="">
                        <valid initial="string.letters,string.digits">
                            <add value="!"/>
                            <add value="="/>
                            <add value="-"/>
                            <add value="."/>
                            <add value="*"/>
                            <add value="?"/>
                            <add value="+"/>
                            <add value="\\"/>
                            <add value="_"/>
                            <add value="|"/>
                            <add value="\t"/>
                            <add value="&#9;"/> <!-- tab -->
                            <add value=","/>
                            <add value=";"/>
                            <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->
                            <add value="&#93;"/> <!-- right square bracket -->
                            <add value="&#40;"/> <!-- left parenthesis -->
                            <add value="&#41;"/> <!-- right parenthesis -->
                        </valid>
                    </sanitizer>
                </param>
            </when>
            <when value="file">
                <param name="queryfile" type="data" format="tabular" label="A orthology-groups file" help="For each group a fasta file is generated containing all proteins/genes of that group."/>
            </when>
        </conditional>
        <param argument="--regex" type="boolean" checked="false" truevalue="-E" falsevalue="" label="Enable regular expressions (perl)" help="If not: the string is escaped (e.g. | -> \|) [-E]"/>
        <param argument="--source" type="boolean" checked="false" truevalue="-source" falsevalue="" label="Add the filename to the gene/protein-name [--source]"/>
    </inputs>
    <outputs>
        <collection name="listproteinorthograbproteins" type="list" label="${tool.name} on ${on_string}: list of fasta">
            <discover_datasets pattern="__designation__" format="fasta" directory="output" visible="false"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
            <param name="querytype" value="string"/>
            <param name="querystring" value="E_1"/>
            <output_collection name="listproteinorthograbproteins" count="1"/>
        </test>
        <test>
            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
            <param name="querytype" value="string"/>
            <param name="regex" value="true"/>
            <param name="source" value="true"/>
            <param name="querystring" value="M..2"/>
            <output_collection name="listproteinorthograbproteins" count="1"/>
        </test>
        <test>
            <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/>
            <param name="querytype" value="file"/>
            <param name="queryfile" value="result.proteinortho.tsv"/>
            <output_collection name="listproteinorthograbproteins" count="34"/>
        </test>
    </tests>
    <help><![CDATA[proteinortho grab proteins

**What it does**

proteinortho_grab_proteins : find gene(s)/protein(s) in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file, then all sequences for each group (one line of the file) is outputted. This can result in many files!

**Other Proteinortho-Tools for downstream analysis**

* `proteinortho summary` : Summaries the orthology-pairs/RBH files to determine how the species are connected to each other.

More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho
]]>
    </help>
    <citations>
        <citation type="doi">10.3389/fbinf.2023.1322477</citation>
        <citation type="doi">10.1186/1471-2105-12-124</citation>
        <citation type="doi">10.1371/journal.pone.0105015</citation>
    </citations>
</tool>