Mercurial > repos > cavelandiah > mirnature
changeset 0:4394f98e705e draft default tip
"planemo upload for repository https://github.com/Bierinformatik/miRNAture/tree/galaxy_add/Galaxy/miRNAture commit 47a893683a9445abddce77c28f43d098b62cf385"
author | cavelandiah |
---|---|
date | Sun, 27 Nov 2022 22:00:05 +0000 |
parents | |
children | |
files | create_dataset.sh generate_blast_folder.sh macros.xml mirnature.xml test-data/Dataset_mirnature_tutorial.tar.gz test-data/fam.txt test-data/genome.fasta test-data/test.fasta |
diffstat | 8 files changed, 303 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_dataset.sh Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +input=$1 + +dir=$2 +mkdir $dir/uncompress +cp $input $dir/uncompress/data.gz +Dir_name=`tar -tf $dir/uncompress/data.gz | head -1 | cut -f1 -d"/" | sort | uniq` +tar -xf $dir/uncompress/data.gz --directory $dir/uncompress/ +mv $dir/uncompress/${Dir_name} $dir/uncompress/Dataset
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_blast_folder.sh Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +fasta_file=$1 +# Anolis_carolinensis.fa miRNA Anolis carolinensis +base_name=$(basename $fasta_file) +echo "$base_name miRNA Unknown specie"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,24 @@ +<macros> + <token name="@TOOL_VERSION@">1.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used (--cpu) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@CPU@"> + --cpu \$addthreads + </token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="20220922">parallel</requirement> + <requirement type="package" version="1.1">mirnature</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.3390/genes12030348</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirnature.xml Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,257 @@ +<tool id="mirnature" name="miRNAture" version="1.1+galaxy0" python_template_version="3.5" profile="21.05"> + <description>Computational detection of canonical microRNAs</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="aggressive"><![CDATA[ + ##Last data path from: https://doi.org/10.5281/zenodo.7180160 + ##MIRNATURE_DATA_PATH=\$(dirname \$(which miRNAture))/../share/blockclust_data && + + #import re + mkdir -p output && + #if $blast_specific.blast_searches == 1: + mkdir -p $__tool_directory__/queries_to_test && + cp '$queries_to_blast' $__tool_directory__/queries_to_test/Unknown_specie.fa && + bash $__tool_directory__/generate_blast_folder.sh Unknown_specie.fa > queries_description.txt && + mv queries_description.txt $__tool_directory__/queries_to_test/ && + #end if + + bash $__tool_directory__/create_dataset.sh '$dataset' '$__tool_directory__' && + + miRNAture + -stage '$stage' + #if $activate_subset.subset_models_des == 1: + -sublist '$subset_models' + #end if + -nbitscore_cut '$nbitscore' + -dataF $__tool_directory__/uncompress/Dataset/ + -specie_genome '$specie_genome' + -specie_name '$specie_name' + -specie_tag '$specie_tag' + -pe '$parallel_linux_select' + -workdir 'output/' + -m "${",".join(map(str, $homology_mode)) + ",final"}" + #if $blast_specific.blast_searches == 1: + #set $joined_str=",".join(map(str, $blast_specific.blast_strategy)) + #set $complete_str=($joined_str + ",ALL") + -strategy '$complete_str' + -blastq '$__tool_directory__/queries_to_test/' + #end if + -rep '$repeat_filter' > '$std_output' && + rm -rf $__tool_directory__/uncompress/ + ]]></command> + + <inputs> + <!--File--> + <param argument="-specie_genome" format="fasta" multiple="true" type="data" + label="Input genome or sequence" + help="Input sequence to be processed by miRNAture" /> + <!--Hidden--> + <param argument="-pe" name="parallel_linux_select" type="integer" value="1" label="Parallel running" help="Test Parallel Linux" > + <validator type="in_range" message="Please set this option to 1." min="1" max="1"/> + </param> + + <!--TEXT--> + <param argument="-specie_name" type="text" label="Scientific specie name as: Genera_specie" /> + <param argument="-specie_tag" type="text" label="Species tag to identify through experiment" /> + + <!--NUM--> + <param argument="-nbitscore_cut" name="nbitscore" type="float" label="nBitscore" + value="1" min="0" max="1" help="Control bitscore threshold to filter Rfam candidates" /> + + <param argument="-rep" name="repeat_filter" type="select" label="Repeats filter" help="Repetition cutoff" > + <option value="relax,150,100">relax</option> + <option value="default,200,100">default</option> + </param> + + <!--Binary--> + <conditional name="activate_subset"> + <!--Binary--> + <param argument="-sublist" name="subset_models_des" type="select" label="Subset default miRNA families" help="Make a selection of specific miRNA families to be search"> + <option value="0">No</option> + <option value="1">Yes</option> + </param> + <when value="0"> + </when> + <when value="1"> + <param format="txt" multiple="true" name="subset_models" type="data" + label="List of miRNA models to be searched" + help="Input sequence to be processed by miRNAture" optional="true"/> + </when> + </conditional> + + <param argument="-dataF" format="gz" multiple="true" name="dataset" type="data" + label="Input Pre-calculated data" + help="Please submit the pre-calculated data to run miRNAture (see https://doi.org/10.5281/zenodo.7180160)" /> + +<!--Static list--> + <param argument="-stage" type="select" label="Stages to run"> + <option value="complete">complete</option> + <option value="homology">homology</option> + <option value="no_homology">no_homology</option> + <option value="validation">validation</option> + <option value="evaluation">evaluation</option> + <option value="summarise">summarise</option> + </param> + <!--Check Boxes--> + <param argument="-mode" name="homology_mode" type="select" multiple="true" label="Homology mode"> + <option value="blast">blast</option> + <option value="rfam">rfam</option> + <option value="mirbase">mirbase</option> + <option value="hmm">hmm</option> + </param> + + <conditional name="blast_specific"> + <param name="blast_searches" type="select" label="Use the blast mode to search annotated queries in your target genome?" help="Activate this option if wanted to use blast mode with provided sequences"> + <option value="0">No</option> + <option value="1">Yes</option> + </param> + <when value="0"> + </when> + <when value="1"> + <param argument="-strategy" name="blast_strategy" type="select" multiple="true" label="Select one or more blast strategy(ies)"> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + <option value="4">4</option> + <option value="5">5</option> + <option value="6">6</option> + <option value="7">7</option> + <option value="8">8</option> + <option value="9">9</option> + </param> + <param argument="-blstq" format="fasta" multiple="true" name="queries_to_blast" type="data" + label="Query sequences" + help="Query sequences to search in target genome" /> + </when> + </conditional> +</inputs> + +<outputs> + <data name="std_output" format="txt" label="Standard output miRNAture" /> + <data format="txt" name="report0" label="Homology output miRNAture"> + <filter>stage == "homology" </filter> + <discover_datasets pattern="(?P<designation>.+)\.yaml" directory="output/" ext="yaml" visible="true"/> + <discover_datasets pattern="(?P<designation>.+)\.gff3" directory="output/miRNA_prediction/Final_Candidates" ext="gff3" visible="true" /> + <discover_datasets pattern="(?P<designation>.+)\.txt\.db" directory="output/miRNA_prediction/Final_Candidates" visible="true" /> + </data> + <data format="txt" name="report1" label="Complete output miRNAture"> + <filter>stage == "complete" </filter> + <discover_datasets pattern="(?P<designation>.+)\.yaml" directory="output/" ext="yaml" visible="true"/> + <discover_datasets pattern="(?P<designation>.+)\.gff3" directory="output/Final_miRNA_evaluation" ext="gff3" visible="true" /> + <discover_datasets pattern="(?P<designation>.+)\.txt" directory="output/Final_miRNA_evaluation" ext="txt" visible="true" /> + <discover_datasets pattern="(?P<designation>.+)\.fasta" directory="output/Final_miRNA_evaluation/Fasta" ext="fasta" visible="true" /> + </data> +</outputs> +<tests> + <test> + <param name="stage" value="homology"/> + <conditional name="activate_subset"> + <param name="subset_models_des" value="1"/> + <param name="subset_models" value="fam.txt"/> + </conditional> + <param name="nbitscore" value="1.0"/> + <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/> + <param name="specie_genome" value="test.fasta"/> + <param name="specie_name" value="Test_specie"/> + <param name="specie_tag" value="Test"/> + <param name="homology_mode" value="hmm"/> + <param name="parallel_linux_select" value="1"/> + <param name="repeat_filter" value="relax"/> + <output name="std_output" file="test.txt"/> + </test> +</tests> + +<help><![CDATA[ + +**miRNAture** detects *bona fide* miRNA candidates through sequence homology +searches and validation steps using structural alignments with +pre-defined or/and modified miRNA-specific covariance models. The +miRNAture pipeline is composed of three modules: (1) Homology search +operating on miRNA precursors, (2) prediction of the positioning of +mature miRNAs within the precursor mature annotation, and (3) an +Evaluation scheme designed to identify false positive miRNA annotations. +This multi-stage approach generates annotation files in BED/GFF3 from +precursors and detected mature regions and corresponding FASTA files. At +the same time, a summary file with the MFE, precursor length and number +of loci of each annotated miRNA family. + +AUTHORS: + *Cristian A. Velandia Huerto*, *Joerg Fallmann* and *Peter F. Stadler* + +USAGE: + ./miRNAture [-options] + +OPTIONS: + -h/-help Print this documentation. + + -blstq/-blastQueriesFolder <PATH> + Path of blast query sequences in FASTA format to be searched + on the subject sequence. + + -dataF/-datadir <PATH> + Path to pre-calculated data directory containing RFAM and + miRBase covariance, hidden markov models, and necessary + files to run MIRfix. + + -m/-mode <blast,hmm,rfam,mirbase> + Homology search modes: blast, hmm, rfam, mirbase, and/or infernal. + It is possible to perform individual analysis, but in this Galaxy + version is always included the *final* option to merge multiple results. + + -rep/-repetition_cutoff <relax,Number_Loci,Candidates_to_evaluate> + Setup number of maximum loci number that will be evaluated + by the mature's annotation stage. By default, miRNAture will + detect miRNA families that report high number of loci (> 200 + loci). Then, it will select the top 100 candidates in terms + of alignment scores, as candidates for the validation stage + (default,200,100). The designed values could be modified by + the following flag in the command line version: + 'relax,Number_Loci,Candidates_to_evaluate'. This option + allows to the user to select the threshold values to detect + repetitive families. The first parameter is <relax>, which + tells miRNAture to change the default configuration. The + next one, <Number_Loci> is the threshold of loci number to + classify a family as repetitive. The last one, + <Candidates_to_evaluate>, is the number of candidates prone + to be evaluated in the next evaluation section. The rest + candidates are included as homology 'potential' candidates. + Selected option for this Galaxy version is set as: + <relax,150,100>. + + -str/-strategy <1,2,3,4,5,6,7,8,9,10> + This flag is blast mode specific. It corresponds to blast + strategies that would be used to search miRNAs. It must be + indicated along with -m *Blast* flag. + + -stg/-stage <'homology','no_homology','validation','evaluation','summarise','complete'> + Selects the stage to be run on miRNAture. The options are: + 'homology', 'no_homology', 'validation', 'evaluation', + 'summarise' or 'complete'. + + -speG/-specie_genome <PATH> + Path of target sequences to be analyzed in FASTA format. + + -speN/-specie_name <Genera_specie> + Specie or sequence source's scientific name. The format must + be: *Genera_specie*, separated by '_'. + + -speT/-specie_tag <TAG_NAME> + Experiment tag. Will help to identify the generated files + along miRNA output files. + + -sublist/-subset_models <FILE_WITH_CM_NAMES> + Target list of CMs to be searched on subject + genome/sequences. If not indicated, miRNAture will run all + RFAM v14.4 metazoan miRNA models. + + -w/-workdir <OUT_PATH> + Working directory path to write all miRNAture results. + +BUGS, CAVEATS, COMPLAINS or DONATIONS + Write directly to cristian at bioinf.uni-leipzig.de + + ]]></help> +<expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fam.txt Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,2 @@ +RF00027 +MIPF0000002
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fasta Sun Nov 27 22:00:05 2022 +0000 @@ -0,0 +1,2 @@ +>scaf1 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAGGTTGAGGTAGTAGGTTGTATAGTTTAGAATTACATCAAGGGAGATAACTGTACAGCCTCCTAGCTTTCCTGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG