Mercurial > repos > artbio > small_rna_maps
view small_rna_maps.xml @ 13:458a88317c22 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit c2fde4ca142c58719749c04046270479485e8d4b
author | artbio |
---|---|
date | Tue, 10 Apr 2018 06:12:01 -0400 |
parents | d33263e6e812 |
children | cd75c72e1d75 |
line wrap: on
line source
<tool id="small_rna_maps" name="small_rna_maps" version="2.5.1"> <description></description> <requirements> <requirement type="package" version="1.11.2=py27_0">numpy</requirement> <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement> <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement> <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement> <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement> <requirement type="package" version="1.4.2=r3.3.2_0">r-reshape2</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> </stdio> <command detect_errors="exit_code"><![CDATA[ #for $file in $series samtools index '$file.inputs'; #end for python '$__tool_directory__'/small_rna_maps.py --inputs #for $file in $series '$file.inputs' #end for --sample_names #for $sample in $series '$sample.inputs.name' #end for --minsize $minsize --maxsize $maxsize --cluster $cluster #if str($plots_options.plots_options_selector ) == "two_plot": --plot_methods '${plots_options.first_plot}' '${plots_options.extra_plot}' --outputs '$output_tab' '$extra_output_tab'; #elif str($plots_options.plots_options_selector ) == "global": --plot_methods 'Size' --outputs '$output_tab'; #else: --plot_methods '${plots_options.first_plot}' --outputs '$output_tab' ; #end if Rscript '$__tool_directory__'/small_rna_maps.r --first_dataframe '$output_tab' --extra_dataframe '$extra_output_tab' --normalization #set $norm = "" #for $file in $series #set $norm += str($file.normalization)+' ' #end for #set $norm = $norm[:-1] '$norm' #if $ylimits_cond.ylimits == "no": --ymin '' --ymax '' #else: --ymin '${ylimits_cond.ymin}' --ymax '${ylimits_cond.ymax}' #end if #if str($plots_options.plots_options_selector ) == "two_plot": --first_plot_method '${plots_options.first_plot}' --extra_plot_method '${plots_options.extra_plot}' #elif str($plots_options.plots_options_selector ) == "global": --first_plot_method 'Size' --extra_plot_method '' --global '${plots_options.mergestrands}' #else: --first_plot_method '${plots_options.first_plot}' --extra_plot_method '' #end if --output_pdf '$output_pdf' ]]></command> <inputs> <repeat name="series" title="Input" min="1"> <param name="inputs" type="data" format="bam" label="Select a single alignment file to parse" multiple="false" help="If 'multiple datasets' method is used, the same normalization factor will be applied to each dataset. Please refer to the help section for more information." /> <param name="normalization" type="float" label="Enter a size/normalization factor." help="Enter only one value e.g. '0.75' or '1.23'" value="1.0"/> </repeat> <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis" value="0" help="default value: 0" /> <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis" value="10000" help="default value: 10000" /> <param name="cluster" type="integer" label="Aggregation distance in nucleotides" value="0" help="If not 0, sets the distance (in nt) below which data are clustered to a single median position" /> <conditional name="plots_options"> <param name="plots_options_selector" type="select" display="radio" label="Plot Options"> <option value="one_plot">Just one plot per chromosome</option> <option value="two_plot" selected="True">Two plots per chromosome</option> <option value="global">Global read size distributions of aligned reads</option> </param> <when value="two_plot"> <param name="first_plot" type="select" display="radio" label="Select the type of the top plot"> <option value="Counts">Counts</option> <option value="Coverage">Coverage</option> <option value="Mean">Mean Sizes</option> <option value="Median">Median Sizes</option> <option value="Size">Size Distributions</option> </param> <param name="extra_plot" type="select" display="radio" label="Select the type of the bottom plot"> <option value="Counts">Counts</option> <option value="Coverage">Coverage</option> <option value="Mean">Mean Sizes</option> <option value="Median">Median Sizes</option> <option value="Size">Size Distributions</option> </param> </when> <when value="one_plot"> <param name="first_plot" type="select" display="radio" label="select the type of plot"> <option value="Counts">Counts</option> <option value="Coverage">Coverage</option> <option value="Mean">Mean Sizes</option> <option value="Median">Median Sizes</option> <option value="Size">Size Distributions</option> </param> </when> <when value="global"> <param name="first_plot" type="hidden" value="Size"/> <param name="mergestrands" type="select" display="radio" label="Whether forward and reverse aligned reads should be merged or not in the histogram"> <option value="nomerge">Do not merge</option> <option value="merge">Merge forward and reverse reads</option> </param> </when> </conditional> <conditional name="ylimits_cond"> <param name="ylimits" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Do you wish to set an y axis limit to the plots?" help="This limit won't be applied to size distribution plots"/> <when value="yes"> <param name="ymin" type="float" label="Enter minimum value" value="0.0" help="e.g. '-5.0'"/> <param name="ymax" type="float" label="Enter maximum value" value="0.0" help="e.g. '5.0'"/> </when> <when value="no"> </when> </conditional> </inputs> <outputs> <data format="tabular" name="output_tab" label="$plots_options.first_plot dataframe" /> <data format="tabular" name="extra_output_tab" label="$plots_options.extra_plot dataframe"> <filter>plots_options['plots_options_selector'] == 'two_plot'</filter> </data> <data format="pdf" name="output_pdf" label="small RNA maps" /> </outputs> <tests> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="ylimits" value="yes" /> <param name="ymin" value="-5" /> <param name="ymax" value="5" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Counts" /> <output file="input1_counts_yminneg5_5.tab" name="output_tab" /> <output file="input1_yminneg5_5_single_plot_counts.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="5" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Coverage" /> <output file="input1_coverage_cluster5.tab" name="output_tab" /> <output file="input1_cluster5_single_plot_coverage.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="20" /> <param name="maxsize" value="30" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Size" /> <output file="input1_min20_max30_size.tab" name="output_tab" /> <output file="input1_min20_max30_single_plot_size.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Mean" /> <output file="input1_mean.tab" name="output_tab" /> <output file="input1__single_plot_mean.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Median" /> <output file="input1_median.tab" name="output_tab" /> <output file="input1_single_plot_median.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input2.bam" ftype="bam" /> <param name="normalization" value="2.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Counts" /> <output file="input1_input2_norm_1_2_counts.tab" name="output_tab" /> <output file="input1_input2_norm_1_2_single_plot_counts.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input2.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="ylimits" value="yes" /> <param name="ymin" value="-5" /> <param name="ymax" value="5" /> <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Size" /> <output file="input1_input2_counts.tab" name="output_tab" /> <output file="input1_input2_size.tab" name="extra_output_tab" /> <output file="input1_input2_double_plot_counts_size_ylimneg5_5.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input_single_chr.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Coverage" /> <output file="input_single_chr_x_6_single_plot_coverage.tab" name="output_tab" /> <output file="input_single_chr_x_6_single_plot_coverage.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input2.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="global" /> <param name="mergestrands" value="nomerge" /> <param name="first_plot" value="Size" /> <output file="size.tab" name="output_tab" /> <output file="global_nomerge.pdf" name="output_pdf" /> </test> <test> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <repeat name="series"> <param name="inputs" value="input2.bam" ftype="bam" /> <param name="normalization" value="1.0" /> </repeat> <param name="minsize" value="0" /> <param name="maxsize" value="10000" /> <param name="cluster" value="0" /> <param name="plots_options_selector" value="global" /> <param name="mergestrands" value="merge" /> <param name="first_plot" value="Size" /> <output file="size.tab" name="output_tab" /> <output file="global_merge.pdf" name="output_pdf" /> </test> </tests> <help> **What it does** Plots mapping statistics of an alignment along the reference chromosomes : - counts - mean sizes - median sizes - coverage depth - size distribution Read counts, mean sizes and median sizes are computed by counting the number of 5' end of reads in each position of a chromosome reference. Coverage depths are computed from the input bam alignment files using the python pysam module. The metrics mentioned above can be plotted either separately: .. image:: one_plot.png Or in all possible pairwise combinations: .. image:: two_plot.png For comparison purposes, values from bam alignment files can be normalized by a size factor before plotting. **Inputs** bam alignment files that must be - single-read - sorted - mapped to the same reference To plot 2 alignment files in the same PDF output the 'single dataset' method should be used. .. class:: warningmark If the 'multiple dataset' method is used the normalization factor will be applied to every file selected in the input list. Additionally each file in the selected list will be plotted in a separate PDF file. **Output** A pdf file generated by the R package lattice and one or two dataframes used to plot the data. </help> <citations> <citation type="doi">10.1093/bioinformatics/btp352</citation> <citation type="bibtex">@Book{, title = {Lattice: Multivariate Data Visualization with R}, author = {Deepayan Sarkar}, publisher = {Springer}, address = {New York}, year = {2008}, note = {ISBN 978-0-387-75968-5}, url = {http://lmdvr.r-forge.r-project.org}, }</citation> </citations> </tool>