view mirnature.xml @ 0:4394f98e705e draft default tip

"planemo upload for repository https://github.com/Bierinformatik/miRNAture/tree/galaxy_add/Galaxy/miRNAture commit 47a893683a9445abddce77c28f43d098b62cf385"
author cavelandiah
date Sun, 27 Nov 2022 22:00:05 +0000
parents
children
line wrap: on
line source

<tool id="mirnature" name="miRNAture" version="1.1+galaxy0" python_template_version="3.5" profile="21.05">
    <description>Computational detection of canonical microRNAs</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        ##Last data path from: https://doi.org/10.5281/zenodo.7180160
        ##MIRNATURE_DATA_PATH=\$(dirname \$(which miRNAture))/../share/blockclust_data &&

        #import re
        mkdir -p output &&
        #if $blast_specific.blast_searches == 1:
            mkdir -p $__tool_directory__/queries_to_test &&
            cp '$queries_to_blast' $__tool_directory__/queries_to_test/Unknown_specie.fa &&
            bash $__tool_directory__/generate_blast_folder.sh Unknown_specie.fa > queries_description.txt  &&
            mv queries_description.txt $__tool_directory__/queries_to_test/ &&
        #end if

        bash $__tool_directory__/create_dataset.sh '$dataset' '$__tool_directory__' &&

        miRNAture 
        -stage '$stage' 
        #if $activate_subset.subset_models_des == 1:
            -sublist '$subset_models'  
        #end if
        -nbitscore_cut '$nbitscore'
        -dataF $__tool_directory__/uncompress/Dataset/
        -specie_genome '$specie_genome' 
        -specie_name '$specie_name'
        -specie_tag '$specie_tag' 
        -pe '$parallel_linux_select'
        -workdir 'output/'
        -m "${",".join(map(str, $homology_mode)) + ",final"}"
        #if $blast_specific.blast_searches == 1:
            #set $joined_str=",".join(map(str, $blast_specific.blast_strategy)) 
            #set $complete_str=($joined_str + ",ALL") 
            -strategy '$complete_str' 
            -blastq '$__tool_directory__/queries_to_test/' 
        #end if
        -rep '$repeat_filter' > '$std_output' &&
        rm -rf $__tool_directory__/uncompress/
        ]]></command>

    <inputs>
        <!--File-->
        <param argument="-specie_genome" format="fasta" multiple="true" type="data"
            label="Input genome or sequence"
            help="Input sequence to be processed by miRNAture" />
        <!--Hidden-->
        <param argument="-pe" name="parallel_linux_select" type="integer" value="1" label="Parallel running" help="Test Parallel Linux" >
            <validator type="in_range" message="Please set this option to 1." min="1" max="1"/>
        </param>

        <!--TEXT-->
        <param argument="-specie_name" type="text" label="Scientific specie name as: Genera_specie" />
        <param argument="-specie_tag" type="text" label="Species tag to identify through experiment" />

        <!--NUM-->
        <param argument="-nbitscore_cut" name="nbitscore" type="float" label="nBitscore" 
            value="1" min="0" max="1" help="Control bitscore threshold to filter Rfam candidates" /> 

        <param argument="-rep" name="repeat_filter" type="select" label="Repeats filter" help="Repetition cutoff" >
                <option value="relax,150,100">relax</option>
                <option value="default,200,100">default</option>
        </param>

        <!--Binary-->
        <conditional name="activate_subset">
            <!--Binary-->
            <param argument="-sublist" name="subset_models_des" type="select" label="Subset default miRNA families" help="Make a selection of specific miRNA families to be search">
                <option value="0">No</option>
                <option value="1">Yes</option>
            </param>
            <when value="0">
            </when>
            <when value="1">
                <param format="txt" multiple="true" name="subset_models" type="data"
                    label="List of miRNA models to be searched"
                    help="Input sequence to be processed by miRNAture" optional="true"/>
            </when>
    </conditional>

    <param argument="-dataF" format="gz" multiple="true" name="dataset" type="data"
            label="Input Pre-calculated data"
            help="Please submit the pre-calculated data to run miRNAture (see https://doi.org/10.5281/zenodo.7180160)" />

<!--Static list-->
    <param argument="-stage" type="select" label="Stages to run">
        <option value="complete">complete</option>
        <option value="homology">homology</option>
        <option value="no_homology">no_homology</option>
        <option value="validation">validation</option>
        <option value="evaluation">evaluation</option>
        <option value="summarise">summarise</option>
    </param>
    <!--Check Boxes-->
    <param argument="-mode" name="homology_mode" type="select" multiple="true" label="Homology mode">
        <option value="blast">blast</option>
        <option value="rfam">rfam</option>
        <option value="mirbase">mirbase</option>
        <option value="hmm">hmm</option>
    </param>

    <conditional name="blast_specific">
        <param name="blast_searches" type="select" label="Use the blast mode to search annotated queries in your target genome?" help="Activate this option if wanted to use blast mode with provided sequences">
                <option value="0">No</option>
                <option value="1">Yes</option>
        </param>
        <when value="0">
        </when>
        <when value="1">
            <param argument="-strategy" name="blast_strategy" type="select" multiple="true" label="Select one or more blast strategy(ies)">
                <option value="1">1</option>
                <option value="2">2</option>
                <option value="3">3</option>
                <option value="4">4</option>
                <option value="5">5</option>
                <option value="6">6</option>
                <option value="7">7</option>
                <option value="8">8</option>
                <option value="9">9</option>
            </param>
            <param argument="-blstq" format="fasta" multiple="true" name="queries_to_blast" type="data"
            label="Query sequences"
            help="Query sequences to search in target genome" />
        </when>
    </conditional>
</inputs>

<outputs>
    <data name="std_output" format="txt" label="Standard output miRNAture" />
    <data format="txt" name="report0" label="Homology output miRNAture">
        <filter>stage == "homology" </filter>
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output/" ext="yaml" visible="true"/>
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/miRNA_prediction/Final_Candidates" ext="gff3" visible="true" />
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt\.db" directory="output/miRNA_prediction/Final_Candidates" visible="true" />
    </data>
    <data format="txt" name="report1" label="Complete output miRNAture">
        <filter>stage == "complete" </filter>
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output/" ext="yaml" visible="true"/>
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/Final_miRNA_evaluation" ext="gff3" visible="true" />
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="output/Final_miRNA_evaluation" ext="txt" visible="true" />
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" directory="output/Final_miRNA_evaluation/Fasta" ext="fasta" visible="true" />
    </data>
</outputs>
<tests>
    <test>
        <param name="stage" value="homology"/>
        <conditional name="activate_subset">
            <param name="subset_models_des" value="1"/>
            <param name="subset_models" value="fam.txt"/>
        </conditional>
        <param name="nbitscore" value="1.0"/>
        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
        <param name="specie_genome" value="test.fasta"/>
        <param name="specie_name" value="Test_specie"/>
        <param name="specie_tag" value="Test"/>
        <param name="homology_mode" value="hmm"/>
        <param name="parallel_linux_select" value="1"/>
        <param name="repeat_filter" value="relax"/>
        <output name="std_output" file="test.txt"/>
    </test>
</tests>

<help><![CDATA[
    
**miRNAture** detects *bona fide* miRNA candidates through sequence homology
searches and validation steps using structural alignments with
pre-defined or/and modified miRNA-specific covariance models. The
miRNAture pipeline is composed of three modules: (1) Homology search
operating on miRNA precursors, (2) prediction of the positioning of
mature miRNAs within the precursor mature annotation, and (3) an
Evaluation scheme designed to identify false positive miRNA annotations.
This multi-stage approach generates annotation files in BED/GFF3 from
precursors and detected mature regions and corresponding FASTA files. At
the same time, a summary file with the MFE, precursor length and number
of loci of each annotated miRNA family.

AUTHORS:
    *Cristian A. Velandia Huerto*, *Joerg Fallmann* and *Peter F. Stadler*

USAGE:
    ./miRNAture [-options]

OPTIONS:
    -h/-help    Print this documentation.

    -blstq/-blastQueriesFolder <PATH>
                Path of blast query sequences in FASTA format to be searched
                on the subject sequence.

    -dataF/-datadir <PATH>
                Path to pre-calculated data directory containing RFAM and
                miRBase covariance, hidden markov models, and necessary
                files to run MIRfix.

    -m/-mode <blast,hmm,rfam,mirbase>
                Homology search modes: blast, hmm, rfam, mirbase, and/or infernal. 
                It is possible to perform individual analysis, but in this Galaxy
                version is always included the *final* option to merge multiple results.

    -rep/-repetition_cutoff <relax,Number_Loci,Candidates_to_evaluate>
                Setup number of maximum loci number that will be evaluated
                by the mature's annotation stage. By default, miRNAture will
                detect miRNA families that report high number of loci (> 200
                loci). Then, it will select the top 100 candidates in terms
                of alignment scores, as candidates for the validation stage
                (default,200,100). The designed values could be modified by
                the following flag in the command line version:
                'relax,Number_Loci,Candidates_to_evaluate'. This option
                allows to the user to select the threshold values to detect
                repetitive families. The first parameter is <relax>, which
                tells miRNAture to change the default configuration. The
                next one, <Number_Loci> is the threshold of loci number to
                classify a family as repetitive. The last one,
                <Candidates_to_evaluate>, is the number of candidates prone
                to be evaluated in the next evaluation section. The rest
                candidates are included as homology 'potential' candidates.
                Selected option for this Galaxy version is set as:
                <relax,150,100>.

    -str/-strategy <1,2,3,4,5,6,7,8,9,10>
                This flag is blast mode specific. It corresponds to blast
                strategies that would be used to search miRNAs. It must be
                indicated along with -m *Blast* flag.

    -stg/-stage <'homology','no_homology','validation','evaluation','summarise','complete'>
                Selects the stage to be run on miRNAture. The options are:
                'homology', 'no_homology', 'validation', 'evaluation',
                'summarise' or 'complete'.

    -speG/-specie_genome <PATH>
                Path of target sequences to be analyzed in FASTA format.

    -speN/-specie_name <Genera_specie>
                Specie or sequence source's scientific name. The format must
                be: *Genera_specie*, separated by '_'.

    -speT/-specie_tag <TAG_NAME>
                Experiment tag. Will help to identify the generated files
                along miRNA output files.

    -sublist/-subset_models <FILE_WITH_CM_NAMES>
                Target list of CMs to be searched on subject
                genome/sequences. If not indicated, miRNAture will run all
                RFAM v14.4 metazoan miRNA models.

    -w/-workdir <OUT_PATH>
                Working directory path to write all miRNAture results.

BUGS, CAVEATS, COMPLAINS or DONATIONS
    Write directly to cristian at bioinf.uni-leipzig.de

    ]]></help>
<expand macro="citations" />
</tool>