view signature.xml @ 12:aa5e2c64dff8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6806c0677e53d52164707faeb36947987f5c500a
author artbio
date Sat, 22 Oct 2022 23:49:52 +0000
parents 8d3ca9652a5b
children 124f404b0fe7
line wrap: on
line source

<tool id="signature" name="Small RNA Signatures" version="3.4.1>">
    <description />
    <requirements>
        <requirement type="package" version="1.23.4">numpy</requirement>
        <requirement type="package" version="0.18.0">pysam</requirement>
        <requirement type="package" version="1.7.3">r-optparse</requirement>
        <requirement type="package" version="0.6_30">r-latticeextra</requirement>
        <requirement type="package" version="2.3">r-gridextra</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Tool exception" />
    </stdio>
      <command detect_errors="exit_code"><![CDATA[
        ln -f -s $input.metadata.bam_index input.bam.bai &&
        ln -s $input input.bam &&
        python '$__tool_directory__'/signature.py
           --input input.bam
           --minquery '$minquery'
           --maxquery '$maxquery'
           --mintarget '$mintarget'
           --maxtarget '$maxtarget'
           --minscope '$minscope'
           --maxscope '$maxscope'
           --output_h '$h_dataframe'
           --output_z '$z_dataframe' &&
        Rscript '$__tool_directory__'/signature.r
           --h_dataframe '$h_dataframe'
           --z_dataframe '$z_dataframe'
           --plot_method '$plot_method'
           --pdf '$pdf'
           --title "Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs"
    ]]></command>
    <inputs>
        <param format="bam" label="Compute signature from this bowtie standard output" name="input" type="data" />
        <param help="'23' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />
        <param help="'23' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />
        <param help="'1' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />
        <param help="'1' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />
        <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="plot_method" type="select">
            <option selected="True" value="global">Global</option>
            <option value="lattice">Lattice</option>
        </param>
    </inputs>
    <outputs>
        <data format="tabular" label="z-signature data frame" name="z_dataframe">
            <actions>
                <action name="column_names" type="metadata" default="Chromosome,Overlap,Number_of_pairs,z-score" />
            </actions>
        </data>
        <data format="tabular" label="h-signature data frame" name="h_dataframe">
            <actions>
                <action name="column_names" type="metadata" default="Chromosome,Overlap,overlap_probability,z-score" />
            </actions>
        </data>
        <data format="pdf" label="Overlap probabilities" name="pdf" />
    </outputs>
    <tests>
        <test>
            <param ftype="bam" name="input" value="sr_bowtie.bam" />
            <param name="minquery" value="23" />
            <param name="maxquery" value="29" />
            <param name="mintarget" value="23" />
            <param name="maxtarget" value="29" />
            <param name="minscope" value="5" />
            <param name="maxscope" value="15" />
            <param name="plot_method" value="global" />
            <output file="h.tab" ftype="tabular" name="h_dataframe" />
            <output file="z.tab" ftype="tabular" name="z_dataframe" />
            <output file="global.pdf" ftype="pdf" name="pdf" />
        </test>
        <test>
            <param ftype="bam" name="input" value="sr_bowtie.bam" />
            <param name="minquery" value="23" />
            <param name="maxquery" value="29" />
            <param name="mintarget" value="23" />
            <param name="maxtarget" value="29" />
            <param name="minscope" value="5" />
            <param name="maxscope" value="15" />
            <param name="plot_method" value="lattice" />
            <output file="h.tab" ftype="tabular" name="h_dataframe" />
            <output file="z.tab" ftype="tabular" name="z_dataframe" />
            <output file="lattice.pdf" ftype="pdf" name="pdf" />
        </test>
    </tests>
    <help>

**What it does**

Compute small RNA (piRNA, siRNA, ...) signatures.

This tool computes (i) the number of pairs **aligned** reads by overlap classes (in nt) and associated z-scores,
and (ii) the ping-pong signal (Brennecke et al. Science. 2008;322: 1387–1392) and associated z-scores.

**Note** that the number of pairs of aligned reads is disctint from the number of pairs of reads
when these reads can be aligned at multiple positions in the genome. The two values are equal only
when the analysis is restricted to uniquely mapping reads.

Options set the min and max size of both the query small rna class and the target small rna class, 
the range of overlaps (in nt) over which to compute the signatures, and whether the signatures should be reported at 
genome-wide level or by item (chromosomes, genes, etc.). For details on computational algorithmes 
for piRNA and siRNA signatures, see `Antoniewski (2014)`_.

.. _Antoniewski (2014): https://link.springer.com/protocol/10.1007%2F978-1-4939-0931-5_12

**Input**

A **sorted** BAM alignment file.

**Outputs**

**Global**: The number of pairs found, the ping-pong signal and the associated z-scores 
are computed at genome-wide level and returned in a pdf file.

**Lattice**: The number of pairs found, the ping-pong signals and the associated z-scores 
are computed for each items described in the BAM alignment input and returned in a pdf file as a lattice graph.


        </help>
    <citations>
            <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>
    </citations>
</tool>