view plotPCA.xml @ 15:38e3f226248b draft

planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit e071ca1cc84da473ecc209c9d5e633d90cbc6a78
author bgruening
date Mon, 15 May 2017 03:58:11 -0400
parents dfe535b75616
children f2f1ca0f032f
line wrap: on
line source

<tool id="deeptools_plot_pca" name="plotPCA" version="@WRAPPER_VERSION@.0">
    <description>Generate principal component analysis (PCA) plots from multiBamSummary or multiBigwigSummary output</description>
    <macros>
        <token name="@BINARY@">plotPCA</token>
        <import>deepTools_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command>
<![CDATA[
        @BINARY@
            --corData "$corData"
            --plotTitle "$plotTitle"
            --plotFile "$outFileName"
            --plotFileFormat "$outFileFormat"
            #if str($advancedOpt.showAdvancedOpt) == 'yes':
                @PLOTWIDTHHEIGHT@
            #end if
            $rowCenter
            #if $outFileNameData
                --outFileNameData "$output_outFileNameData"
            #end if
]]>
    </command>
    <inputs>
        <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary or multiBigwigSummary tools"/>
        <expand macro="input_image_file_format" />
        <expand macro="plotTitle" />
        <param argument="--outFileNameData" type="boolean" label="Save the matrix of PCA and eigenvalues underlying the plot."/>
        <param argument="--rowCenter" type="boolean" label="Center Rows?" help="When specified, each row (bin, gene, etc.) in the matrix is centered at 0 before the PCA is computed. This is useful only if you have a strong bin/gene/etc. correlation and the resulting principal component has samples stacked vertically." truevalue="--rowCenter" falsevalue="" />
        <conditional name="advancedOpt">
            <param name="showAdvancedOpt" type="select" label="Show advanced options" >
                <option value="no" selected="true">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="no" />
            <when value="yes">
                <expand macro="plotWidthHeight" PLOTWIDTH="5.0" PLOTHEIGHT="10.0" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <expand macro="output_image_file_format_not_nested" />
        <data format="tabular" name="output_outFileNameData" label="${tool.name} on ${on_string}: PCA matrix">
            <filter>outFileNameData</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="corData" value="multiBamSummary_result2b.npz" ftype="deeptools_coverage_matrix" />
            <param name="plotTitle" value="Test Plot" />
            <param name="outFileFormat" value="png" />
            <output name="outFileName" file="plotPCA_result1.png" ftype="png" compare="sim_size" delta="4000" />
        </test>
        <test>
            <param name="corData" value="multiBamSummary_result2b.npz" ftype="deeptools_coverage_matrix" />
            <param name="plotTitle" value="Test Plot" />
            <param name="outFileFormat" value="png" />
            <param name="outFileNameData" value="True" />
            <output name="outFileName" file="plotPCA_result2.png" ftype="png" compare="sim_size" delta="4000" />
            <output name="output_outFileNameData" file="plotPCA_result2.tabular" ftype="tabular" />
        </test>
    </tests>
    <help>
<![CDATA[

What it does
------------

This tool takes the **default output file** of ``multiBamSummary`` or ``multiBigwigSummary`` to perform a principal component analysis (PCA).

Output
------

The result is a panel of two plots:

1. The eigenvalues of the **top two principal components**.
2. The **Scree plot** for the top five principal components where the bars represent the amount of variability explained by the individual factors and the red line traces the amount of variability is explained by the individual components in a cumulative manner

Example plot
------------

.. image:: $PATH_TO_IMAGES/plotPCA_annotated.png
   :width: 600
   :height: 315

-----

Theoretical Background
----------------------

Principal component analysis (PCA) can be used, for example, to determine whether **samples display greater variability** between experimental conditions than between replicates of the same treatment. PCA is also useful to identify unexpected patterns, such as those caused by batch effects or outliers.
Principal components represent the directions along which the variation in the data is maximal, so that the information (e.g., read coverage values) from thousands of regions can be represented by just a few dimensions.

PCA is not necessarily meant to identify unknown groupings or clustering; it is up to the researcher to determine the experimental or technical reason underlying the principal components.


-----

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>