changeset 0:4394f98e705e draft default tip

"planemo upload for repository https://github.com/Bierinformatik/miRNAture/tree/galaxy_add/Galaxy/miRNAture commit 47a893683a9445abddce77c28f43d098b62cf385"
author cavelandiah
date Sun, 27 Nov 2022 22:00:05 +0000
parents
children
files create_dataset.sh generate_blast_folder.sh macros.xml mirnature.xml test-data/Dataset_mirnature_tutorial.tar.gz test-data/fam.txt test-data/genome.fasta test-data/test.fasta
diffstat 8 files changed, 303 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_dataset.sh	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+input=$1
+
+dir=$2
+mkdir $dir/uncompress
+cp $input $dir/uncompress/data.gz
+Dir_name=`tar -tf $dir/uncompress/data.gz | head -1 | cut -f1 -d"/" | sort | uniq`
+tar -xf $dir/uncompress/data.gz --directory $dir/uncompress/
+mv $dir/uncompress/${Dir_name} $dir/uncompress/Dataset
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_blast_folder.sh	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+fasta_file=$1
+# Anolis_carolinensis.fa  miRNA   Anolis carolinensis
+base_name=$(basename $fasta_file)
+echo "$base_name miRNA Unknown specie"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,24 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@ADDTHREADS@"><![CDATA[
+        ##compute the number of ADDITIONAL threads to be used (--cpu)
+        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
+    ]]></token>
+    <token name="@CPU@">
+      --cpu \$addthreads
+  </token>
+    <xml name="requirements">
+        <requirements>
+        <requirement type="package" version="20220922">parallel</requirement>
+        <requirement type="package" version="1.1">mirnature</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.3390/genes12030348</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirnature.xml	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,257 @@
+<tool id="mirnature" name="miRNAture" version="1.1+galaxy0" python_template_version="3.5" profile="21.05">
+    <description>Computational detection of canonical microRNAs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="aggressive"><![CDATA[
+        ##Last data path from: https://doi.org/10.5281/zenodo.7180160
+        ##MIRNATURE_DATA_PATH=\$(dirname \$(which miRNAture))/../share/blockclust_data &&
+
+        #import re
+        mkdir -p output &&
+        #if $blast_specific.blast_searches == 1:
+            mkdir -p $__tool_directory__/queries_to_test &&
+            cp '$queries_to_blast' $__tool_directory__/queries_to_test/Unknown_specie.fa &&
+            bash $__tool_directory__/generate_blast_folder.sh Unknown_specie.fa > queries_description.txt  &&
+            mv queries_description.txt $__tool_directory__/queries_to_test/ &&
+        #end if
+
+        bash $__tool_directory__/create_dataset.sh '$dataset' '$__tool_directory__' &&
+
+        miRNAture 
+        -stage '$stage' 
+        #if $activate_subset.subset_models_des == 1:
+            -sublist '$subset_models'  
+        #end if
+        -nbitscore_cut '$nbitscore'
+        -dataF $__tool_directory__/uncompress/Dataset/
+        -specie_genome '$specie_genome' 
+        -specie_name '$specie_name'
+        -specie_tag '$specie_tag' 
+        -pe '$parallel_linux_select'
+        -workdir 'output/'
+        -m "${",".join(map(str, $homology_mode)) + ",final"}"
+        #if $blast_specific.blast_searches == 1:
+            #set $joined_str=",".join(map(str, $blast_specific.blast_strategy)) 
+            #set $complete_str=($joined_str + ",ALL") 
+            -strategy '$complete_str' 
+            -blastq '$__tool_directory__/queries_to_test/' 
+        #end if
+        -rep '$repeat_filter' > '$std_output' &&
+        rm -rf $__tool_directory__/uncompress/
+        ]]></command>
+
+    <inputs>
+        <!--File-->
+        <param argument="-specie_genome" format="fasta" multiple="true" type="data"
+            label="Input genome or sequence"
+            help="Input sequence to be processed by miRNAture" />
+        <!--Hidden-->
+        <param argument="-pe" name="parallel_linux_select" type="integer" value="1" label="Parallel running" help="Test Parallel Linux" >
+            <validator type="in_range" message="Please set this option to 1." min="1" max="1"/>
+        </param>
+
+        <!--TEXT-->
+        <param argument="-specie_name" type="text" label="Scientific specie name as: Genera_specie" />
+        <param argument="-specie_tag" type="text" label="Species tag to identify through experiment" />
+
+        <!--NUM-->
+        <param argument="-nbitscore_cut" name="nbitscore" type="float" label="nBitscore" 
+            value="1" min="0" max="1" help="Control bitscore threshold to filter Rfam candidates" /> 
+
+        <param argument="-rep" name="repeat_filter" type="select" label="Repeats filter" help="Repetition cutoff" >
+                <option value="relax,150,100">relax</option>
+                <option value="default,200,100">default</option>
+        </param>
+
+        <!--Binary-->
+        <conditional name="activate_subset">
+            <!--Binary-->
+            <param argument="-sublist" name="subset_models_des" type="select" label="Subset default miRNA families" help="Make a selection of specific miRNA families to be search">
+                <option value="0">No</option>
+                <option value="1">Yes</option>
+            </param>
+            <when value="0">
+            </when>
+            <when value="1">
+                <param format="txt" multiple="true" name="subset_models" type="data"
+                    label="List of miRNA models to be searched"
+                    help="Input sequence to be processed by miRNAture" optional="true"/>
+            </when>
+    </conditional>
+
+    <param argument="-dataF" format="gz" multiple="true" name="dataset" type="data"
+            label="Input Pre-calculated data"
+            help="Please submit the pre-calculated data to run miRNAture (see https://doi.org/10.5281/zenodo.7180160)" />
+
+<!--Static list-->
+    <param argument="-stage" type="select" label="Stages to run">
+        <option value="complete">complete</option>
+        <option value="homology">homology</option>
+        <option value="no_homology">no_homology</option>
+        <option value="validation">validation</option>
+        <option value="evaluation">evaluation</option>
+        <option value="summarise">summarise</option>
+    </param>
+    <!--Check Boxes-->
+    <param argument="-mode" name="homology_mode" type="select" multiple="true" label="Homology mode">
+        <option value="blast">blast</option>
+        <option value="rfam">rfam</option>
+        <option value="mirbase">mirbase</option>
+        <option value="hmm">hmm</option>
+    </param>
+
+    <conditional name="blast_specific">
+        <param name="blast_searches" type="select" label="Use the blast mode to search annotated queries in your target genome?" help="Activate this option if wanted to use blast mode with provided sequences">
+                <option value="0">No</option>
+                <option value="1">Yes</option>
+        </param>
+        <when value="0">
+        </when>
+        <when value="1">
+            <param argument="-strategy" name="blast_strategy" type="select" multiple="true" label="Select one or more blast strategy(ies)">
+                <option value="1">1</option>
+                <option value="2">2</option>
+                <option value="3">3</option>
+                <option value="4">4</option>
+                <option value="5">5</option>
+                <option value="6">6</option>
+                <option value="7">7</option>
+                <option value="8">8</option>
+                <option value="9">9</option>
+            </param>
+            <param argument="-blstq" format="fasta" multiple="true" name="queries_to_blast" type="data"
+            label="Query sequences"
+            help="Query sequences to search in target genome" />
+        </when>
+    </conditional>
+</inputs>
+
+<outputs>
+    <data name="std_output" format="txt" label="Standard output miRNAture" />
+    <data format="txt" name="report0" label="Homology output miRNAture">
+        <filter>stage == "homology" </filter>
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output/" ext="yaml" visible="true"/>
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/miRNA_prediction/Final_Candidates" ext="gff3" visible="true" />
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt\.db" directory="output/miRNA_prediction/Final_Candidates" visible="true" />
+    </data>
+    <data format="txt" name="report1" label="Complete output miRNAture">
+        <filter>stage == "complete" </filter>
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output/" ext="yaml" visible="true"/>
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/Final_miRNA_evaluation" ext="gff3" visible="true" />
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="output/Final_miRNA_evaluation" ext="txt" visible="true" />
+        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" directory="output/Final_miRNA_evaluation/Fasta" ext="fasta" visible="true" />
+    </data>
+</outputs>
+<tests>
+    <test>
+        <param name="stage" value="homology"/>
+        <conditional name="activate_subset">
+            <param name="subset_models_des" value="1"/>
+            <param name="subset_models" value="fam.txt"/>
+        </conditional>
+        <param name="nbitscore" value="1.0"/>
+        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
+        <param name="specie_genome" value="test.fasta"/>
+        <param name="specie_name" value="Test_specie"/>
+        <param name="specie_tag" value="Test"/>
+        <param name="homology_mode" value="hmm"/>
+        <param name="parallel_linux_select" value="1"/>
+        <param name="repeat_filter" value="relax"/>
+        <output name="std_output" file="test.txt"/>
+    </test>
+</tests>
+
+<help><![CDATA[
+    
+**miRNAture** detects *bona fide* miRNA candidates through sequence homology
+searches and validation steps using structural alignments with
+pre-defined or/and modified miRNA-specific covariance models. The
+miRNAture pipeline is composed of three modules: (1) Homology search
+operating on miRNA precursors, (2) prediction of the positioning of
+mature miRNAs within the precursor mature annotation, and (3) an
+Evaluation scheme designed to identify false positive miRNA annotations.
+This multi-stage approach generates annotation files in BED/GFF3 from
+precursors and detected mature regions and corresponding FASTA files. At
+the same time, a summary file with the MFE, precursor length and number
+of loci of each annotated miRNA family.
+
+AUTHORS:
+    *Cristian A. Velandia Huerto*, *Joerg Fallmann* and *Peter F. Stadler*
+
+USAGE:
+    ./miRNAture [-options]
+
+OPTIONS:
+    -h/-help    Print this documentation.
+
+    -blstq/-blastQueriesFolder <PATH>
+                Path of blast query sequences in FASTA format to be searched
+                on the subject sequence.
+
+    -dataF/-datadir <PATH>
+                Path to pre-calculated data directory containing RFAM and
+                miRBase covariance, hidden markov models, and necessary
+                files to run MIRfix.
+
+    -m/-mode <blast,hmm,rfam,mirbase>
+                Homology search modes: blast, hmm, rfam, mirbase, and/or infernal. 
+                It is possible to perform individual analysis, but in this Galaxy
+                version is always included the *final* option to merge multiple results.
+
+    -rep/-repetition_cutoff <relax,Number_Loci,Candidates_to_evaluate>
+                Setup number of maximum loci number that will be evaluated
+                by the mature's annotation stage. By default, miRNAture will
+                detect miRNA families that report high number of loci (> 200
+                loci). Then, it will select the top 100 candidates in terms
+                of alignment scores, as candidates for the validation stage
+                (default,200,100). The designed values could be modified by
+                the following flag in the command line version:
+                'relax,Number_Loci,Candidates_to_evaluate'. This option
+                allows to the user to select the threshold values to detect
+                repetitive families. The first parameter is <relax>, which
+                tells miRNAture to change the default configuration. The
+                next one, <Number_Loci> is the threshold of loci number to
+                classify a family as repetitive. The last one,
+                <Candidates_to_evaluate>, is the number of candidates prone
+                to be evaluated in the next evaluation section. The rest
+                candidates are included as homology 'potential' candidates.
+                Selected option for this Galaxy version is set as:
+                <relax,150,100>.
+
+    -str/-strategy <1,2,3,4,5,6,7,8,9,10>
+                This flag is blast mode specific. It corresponds to blast
+                strategies that would be used to search miRNAs. It must be
+                indicated along with -m *Blast* flag.
+
+    -stg/-stage <'homology','no_homology','validation','evaluation','summarise','complete'>
+                Selects the stage to be run on miRNAture. The options are:
+                'homology', 'no_homology', 'validation', 'evaluation',
+                'summarise' or 'complete'.
+
+    -speG/-specie_genome <PATH>
+                Path of target sequences to be analyzed in FASTA format.
+
+    -speN/-specie_name <Genera_specie>
+                Specie or sequence source's scientific name. The format must
+                be: *Genera_specie*, separated by '_'.
+
+    -speT/-specie_tag <TAG_NAME>
+                Experiment tag. Will help to identify the generated files
+                along miRNA output files.
+
+    -sublist/-subset_models <FILE_WITH_CM_NAMES>
+                Target list of CMs to be searched on subject
+                genome/sequences. If not indicated, miRNAture will run all
+                RFAM v14.4 metazoan miRNA models.
+
+    -w/-workdir <OUT_PATH>
+                Working directory path to write all miRNAture results.
+
+BUGS, CAVEATS, COMPLAINS or DONATIONS
+    Write directly to cristian at bioinf.uni-leipzig.de
+
+    ]]></help>
+<expand macro="citations" />
+</tool>
Binary file test-data/Dataset_mirnature_tutorial.tar.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fam.txt	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,2 @@
+RF00027
+MIPF0000002
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fasta	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,2 @@
+>scaf1
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAGGTTGAGGTAGTAGGTTGTATAGTTTAGAATTACATCAAGGGAGATAACTGTACAGCCTCCTAGCTTTCCTGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fasta	Sun Nov 27 22:00:05 2022 +0000
@@ -0,0 +1,2 @@
+>hsa-let-7a-2 MI0000061
+AGGTTGAGGTAGTAGGTTGTATAGTTTAGAATTACATCAAGGGAGATAACTGTACAGCCTCCTAGCTTTCCT