view small_rna_maps.xml @ 11:a561a71bd7d7 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit c24bbb6d53574eb1c1eb8d219cf2a39a9ed5b3ff
author artbio
date Tue, 06 Mar 2018 06:11:55 -0500
parents c3fb2a864526
children d33263e6e812
line wrap: on
line source

<tool id="small_rna_maps" name="small_rna_maps" version="2.4.3">
  <description></description>
  <requirements>
        <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
        <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
        <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement>
        <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement>
        <requirement type="package" version="1.4.2=r3.3.2_0">r-reshape2</requirement>
        
  </requirements>
  <stdio>
      <exit_code range="1:" level="fatal" description="Tool exception" />
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
      #for $file in $inputs
          samtools index '$file' &&
      #end for
      python '$__tool_directory__'/small_rna_maps.py 
          --inputs 
      #for $file in $inputs
           '$file'
      #end for
          --sample_names
      #for $sample in $inputs
          '$sample.name'
      #end for
          --minsize $minsize
          --maxsize $maxsize
          --cluster $cluster
      #if str($plots_options.plots_options_selector ) == "two_plot":
          --plot_methods '${plots_options.first_plot}' '${plots_options.extra_plot}'
          --outputs '$output_tab' '$extra_output_tab' &&
      #elif str($plots_options.plots_options_selector ) == "global":
          --plot_methods 'Size'
          --outputs '$output_tab' &&
      #else:
          --plot_methods '${plots_options.first_plot}'
          --outputs '$output_tab' &&
      #end if
      Rscript '$__tool_directory__'/small_rna_maps.r
          --first_dataframe '$output_tab' 
          --extra_dataframe '$extra_output_tab'
          --normalization '$normalization'
          #if str($plots_options.plots_options_selector ) == "two_plot":
              --first_plot_method '${plots_options.first_plot}'
              --extra_plot_method '${plots_options.extra_plot}'
          #elif str($plots_options.plots_options_selector ) == "global":
              --first_plot_method 'Size'
              --extra_plot_method ''
              --global '${plots_options.mergestrands}'
          #else:
              --first_plot_method '${plots_options.first_plot}'
              --extra_plot_method ''
          #end if
              --output_pdf '$output_pdf'
  ]]></command>
 <inputs>
    <param name="inputs" type="data" format="bam" label="Select multiple alignments to parse" multiple="True"/>
    <param name="normalization" type="text" label="Enter size/normalization factors as a space-separated list. Leave blank for no normalization (default)"
           help="e.g. '1 0.75 1.23'. Values of the list map to the above selected files from bottom to top" />
    <param name="minsize" type="integer" label="Minimal size of reads for inclusion in analysis"
           value="0" help="default value: 0" />
    <param name="maxsize" type="integer" label="Maximal size of reads for inclusion in analysis"
           value="10000" help="default value: 10000" />
    <param name="cluster" type="integer" label="aggregation distance in nucleotides"
           value="0" help="if not 0, sets the distance (in nt) below which data are clustered to a single median position" />
    <conditional name="plots_options">
        <param name="plots_options_selector" type="select" display="radio" label="Plot Options">
            <option value="one_plot">Just one plot per chromosome</option>
            <option value="two_plot" selected="True">Two plots per chromosome</option>
            <option value="global">Global read size distributions of aligned reads</option> 
        </param>
        <when value="two_plot">
            <param name="first_plot" type="select" display="radio" label="Select the type of the first plot">
                <option value="Counts">Counts</option> 
                <option value="Coverage">Coverage</option> 
                <option value="Mean">Mean Sizes</option> 
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
            <param name="extra_plot" type="select" display="radio" label="Select the type of the second plot">
                <option value="Counts">Counts</option> 
                <option value="Coverage">Coverage</option> 
                <option value="Mean">Mean Sizes</option> 
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
        </when>
        <when value="one_plot">
            <param name="first_plot" type="select" display="radio" label="select the type of the first plot">
                <option value="Counts">Counts</option> 
                <option value="Coverage">Coverage</option> 
                <option value="Mean">Mean Sizes</option> 
                <option value="Median">Median Sizes</option>
                <option value="Size">Size Distributions</option>
            </param>
        </when>
        <when value="global">
            <param name="first_plot" type="hidden" value="Size"/>
            <param name="mergestrands" type="select" display="radio" label="Whether forward and reverse aligned reads should be merged or not in the histogram">
                <option value="nomerge">Do not merge</option>
                <option value="merge">Merge forward and reverse reads</option>
            </param>
        </when>
    </conditional>
 </inputs>

 <outputs>
    <data format="tabular" name="output_tab" label="$plots_options.first_plot dataframe" />
    <data format="tabular" name="extra_output_tab" label="$plots_options.extra_plot dataframe">
        <filter>plots_options['plots_options_selector'] == 'two_plot'</filter>
    </data> 
    <data format="pdf" name="output_pdf" label="small RNA maps" />
</outputs>

    <tests>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="cluster" value="5" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="normalization" value="1 1" />
            <param name="first_plot" value="Counts" />
            <output file="count_cluster_5.tab" name="output_tab" />
            <output file="count_cluster_5.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="global" />
            <param name="mergestrands" value="nomerge" />
            <param name="first_plot" value="Size" />
            <param name="normalization" value="1 1" />
            <output file="size.tab" name="output_tab" />
            <output file="global_nomerge.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="global" />
            <param name="mergestrands" value="merge" />
            <param name="first_plot" value="Size" />
            <param name="normalization" value="1 1" />
            <output file="size.tab" name="output_tab" />
            <output file="global_merge.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam" ftype="bam"/>
            <param name="minsize" value="20" />
            <param name="maxsize" value="30" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="global" />
            <param name="mergestrands" value="merge" />
            <param name="first_plot" value="Size" />
            <param name="normalization" value="1 1" />
            <output file="size20-30.tab" name="output_tab" />
            <output file="global_merge_20-30.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Mean" />
            <param name="normalization" value="1 1" />
            <output file="count.tab" name="output_tab" />
            <output file="mean.tab" name="extra_output_tab" />
            <output file="count-mean.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input2.bam,input1.bam" ftype="bam"/>
            <param name="minsize" value="20" />
            <param name="maxsize" value="30" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Mean" />
            <param name="normalization" value="1 1" />
            <output file="count20-30.tab" name="output_tab" />
            <output file="mean20-30.tab" name="extra_output_tab" />
            <output file="count-mean-20-30.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input1.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Mean" />
            <param name="normalization" value="1 1" />
            <output file="doubled_count.tab" name="output_tab" />
            <output file="doubled_mean.tab" name="extra_output_tab" />
            <output file="doubled_count-mean.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Median" />
            <param name="normalization" value="1 1" />
            <output file="count.tab" name="output_tab" />
            <output file="median.tab" name="extra_output_tab" />
            <output file="count-median.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Coverage" />
            <param name="normalization" value="1 1" />
            <output file="count.tab" name="output_tab" />
            <output file="coverage.tab" name="extra_output_tab" />
            <output file="count-coverage.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Size" />
            <param name="normalization" value="1 1" />
            <output file="count.tab" name="output_tab" />
            <output file="size.tab" name="extra_output_tab" />
            <output file="count-size.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Size" />
            <param name="extra_plot" value="Counts" />
            <param name="normalization" value="1 1" />
            <output file="size.tab" name="output_tab" />
            <output file="count.tab" name="extra_output_tab" />
            <output file="size-count.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Size" />
            <param name="extra_plot" value="Counts" />
            <param name="normalization" value="1 0.75" />
            <output file="size.tab" name="output_tab" />
            <output file="count.tab" name="extra_output_tab" />
            <output file="size-count_normed.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input_single_chr.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Coverage" />
            <param name="normalization" value="1 1" />
            <output file="count_1_chr.tab" name="output_tab" />
            <output file="coverage_1_chr.tab" name="extra_output_tab" />
            <output file="counts_coverage_1.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input_5_chr.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="two_plot" />
            <param name="first_plot" value="Counts" />
            <param name="extra_plot" value="Coverage" />
            <param name="normalization" value="1 1" />
            <output file="count_5_chr.tab" name="output_tab" />
            <output file="coverage_5_chr.tab" name="extra_output_tab" />
            <output file="counts_coverage_5.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input_single_chr.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Counts" />
            <param name="normalization" value="1 1" />
            <output file="count_1_chr.tab" name="output_tab" />
            <output file="count_1.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="cluster" value="0" />
            <param name="maxsize" value="10000" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="normalization" value="1 1" />
            <param name="first_plot" value="Counts" />
            <output file="count.tab" name="output_tab" />
            <output file="count.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Size" />
            <param name="normalization" value="1 1" />
            <output file="size.tab" name="output_tab" />
            <output file="size.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Coverage" />
            <param name="normalization" value="1 1" />
            <output file="coverage.tab" name="output_tab" />
            <output file="coverage.pdf" name="output_pdf" />
        </test>
        <test>
            <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/>
            <param name="minsize" value="0" />
            <param name="maxsize" value="10000" />
            <param name="cluster" value="0" />
            <param name="plots_options_selector" value="one_plot" />
            <param name="first_plot" value="Coverage" />
            <param name="normalization" value="1 0.2" />
            <output file="coverage.tab" name="output_tab" />
            <output file="coverage_normed.pdf" name="output_pdf" />
        </test>
    </tests>


<help>

**What it does**

Plots maps of (1) read counts, (2) mean sizes, (3) median sizes, (4) coverage depth or (5)
size read distribution along chromosome references.

Mean sizes and median sizes are the mean and the median sizes, respectively, of all reads
whose 5' end map to a given coordinate in a chromosome reference.
Coverage depths are computed from the input bam alignment files using the python pysam module.

The variables mentioned above (1-5) can be plotted either separately or in all possible
pairwise combinations.

For comparison purpose, values from bam alignment files can be normalized by a size factor
before plotting. If the normalization field is leaved blank, default normalization of 1
is assumed.

**Inputs**

bam alignment files that must be

  - single-read
  - sorted
  - mapping to the same reference
  
Optionally, a space-separated list of normalization/size factors may be added before plotting.
This list maps to the selected bam alignments from bottom to top.

**Output**

A pdf file generated by the R package lattice

One or two dataframes used to plot data

</help>

<citations>
    <citation type="doi">10.1093/bioinformatics/btp352</citation>
     <citation type="bibtex">@Book{,
    title = {Lattice: Multivariate Data Visualization with R},
    author = {Deepayan Sarkar},
    publisher = {Springer},
    address = {New York},
    year = {2008},
    note = {ISBN 978-0-387-75968-5},
    url = {http://lmdvr.r-forge.r-project.org},
  }</citation>
</citations>
</tool>