view volcanoplot.xml @ 3:6d532d760950 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/volcanoplot commit 16205b1cd782c5a4f7c08af7d4ee50dc5903418b
author iuc
date Tue, 12 Feb 2019 05:32:23 -0500
parents 7b7303fa20e3
children 73b8cb5bddcd
line wrap: on
line source

<tool id="volcanoplot" name="Volcano Plot" version="0.0.3">
    <description>create a volcano plot</description>
    <requirements>
        <requirement type="package" version="3.1.0">r-ggplot2</requirement>
        <requirement type="package" version="0.8.0">r-ggrepel</requirement>
        <requirement type="package" version="0.7.8">r-dplyr</requirement>
        <requirement type="package" version="1.20.2">r-getopt</requirement>
    </requirements>
    <version_command><![CDATA[
echo $(R --version | grep version | grep -v GNU)", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggrepel version" $(R --vanilla --slave -e "library(ggrepel); cat(sessionInfo()\$otherPkgs\$ggrepel\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
    ]]></version_command>
    <command detect_errors="exit_code"><![CDATA[

Rscript '${__tool_directory__}/volcanoplot.R'

    -i '$input'
    -a $fdr_col
    -p $pval_col
    -c $lfc_col
    -l $label_col
    -s $signif_thresh
    -x $lfc_thresh
    #if $labels.label_select == 'file':
        -f '$labels.label_file'
    #else if $labels.label_select == 'signif':
        #if $labels.top_num:
            -t $labels.top_num
        #end if
    #else
        -t 0
    #end if
    #if $plot_options.boxes:
        -b
    #end if
    #if $plot_options.title:
        -T '$plot_options.title'
    #end if
    #if $plot_options.xlab:
        -X '$plot_options.xlab'
    #end if
    #if $plot_options.ylab:
        -Y '$plot_options.ylab'
    #end if
    #if $plot_options.xmin:
        -m '$plot_options.xmin'
    #end if
    #if $plot_options.xmax:
        -M '$plot_options.xmax'
    #end if
    #if $plot_options.ymax:
        -W '$plot_options.ymax'
    #end if
    #if $plot_options.legend:
        -L '$plot_options.legend'
    #end if
    -z '$plot_options.legend_labs'

]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Specify an input file" />
        <param name="fdr_col" type="data_column" data_ref="input" label="FDR (adjusted P value)" />
        <param name="pval_col" type="data_column" data_ref="input" label="P value (raw)" />
        <param name="lfc_col" type="data_column" data_ref="input" label="Log Fold Change" />
        <param name="label_col" type="data_column" data_ref="input" label="Labels" />
        <param name="signif_thresh" type="float" max="1" value="0.05" label="Significance threshold" help="Default: 0.05"/>
        <param name="lfc_thresh" type="float" value="0" label="LogFC threshold to colour" help="Default: 0"/>
        <conditional name="labels">
            <param name="label_select" type="select" label="Points to label" help="Select to label significant points or input labels from file. Default: None">
                <option value="none" selected="True">None</option>
                <option value="signif">Significant</option>
                <option value="file">Input from file</option>
            </param>
            <when value="signif">
                <param name="top_num" type="integer" optional="True" label="Only label top most significant" help="Specify the top number of points to label by P value significance. If no number is specified, all points that pass the FDR and Log Fold Change thresholds will be labelled."/>
            </when>
            <when value="file">
                <param name="label_file" type="data" format="tabular" label="File of labels"/>
            </when>
        </conditional>
        <section name="plot_options" expanded="false" title="Plot Options">
            <param name="boxes" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Label Boxes" help="If this is set to Yes, the labels for the points will be in boxes. Default: Yes"/>
            <param name="title" type="text" optional="True" label="Plot title"/>
            <param name="xlab" type="text" optional="True" label="Label for x axis"/>
            <param name="ylab" type="text" optional="True" label="Label for y axis"/>
            <param name="xmin" type="float" optional="True" label="Minimum value for x axis" help="To customise the x axis limits, specify both minimum and maximum values. Leave empty for automatic values."/>
            <param name="xmax" type="float" optional="True" label="Maximum value for x axis" help="To customise the x axis limits, specify both minimum and maximum values. Leave empty for automatic values."/>
            <param name="ymax" type="float" optional="True" label="Maximum value for y axis" help="To customise the y axis upper limit, specify the maximum value, the minimum will be 0. Leave empty for automatic value."/>
            <param name="legend" type="text" optional="True" label="Label for Legend Title"/>
            <param name="legend_labs" type="text" value="Down,Not Sig,Up" label="Labels for Legend" help="Labels in the legend can be specified. Default: Down,Not Sig,Up"/>
        </section>
    </inputs>
    <outputs>
        <data name="plot" format="pdf" from_work_dir="out.pdf" label="Volcano plot on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <!-- Ensure default output works -->
            <param name="input" ftype="tabular" value="input.tab"/>
            <param name="fdr_col" value="4" />
            <param name="pval_col" value="3" />
            <param name="lfc_col" value="2" />
            <param name="label_col" value="1" />
            <param name="lfc_thresh" value="0" />
            <output name="plot" value= "out.pdf" compare="sim_size" />
        </test>
        <test>
            <!-- Ensure input labels and plot options work -->
            <param name="input" ftype="tabular" value="input.tab"/>
            <param name="fdr_col" value="4" />
            <param name="pval_col" value="3" />
            <param name="lfc_col" value="2" />
            <param name="label_col" value="1" />
            <param name="lfc_thresh" value="0" />
            <param name="label_select" value="file"/>
            <param name="label_file" ftype="tabular" value="labels.tab" />
            <output name="plot" value= "out2.pdf" compare="sim_size" />
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool creates a Volcano plot using ggplot2. Points can be labelled via ggrepel.

In statistics, a `Volcano plot`_ is a type of scatter-plot that is used to quickly identify changes in large data sets composed of replicate data. It plots significance versus fold-change on the y and x axes, respectively. These plots are increasingly common in omic experiments such as genomics, proteomics, and metabolomics where one often has a list of many thousands of replicate data points between two conditions and one wishes to quickly identify the most meaningful changes. A volcano plot combines a measure of statistical significance from a statistical test (e.g., a p value from an ANOVA model) with the magnitude of the change, enabling quick visual identification of those data-points (genes, etc.) that display large magnitude changes that are also statistically significant.

A volcano plot is constructed by plotting the negative log of the p value on the y axis (usually base 10). This results in data points with low p values (highly significant) appearing toward the top of the plot. The x axis is the log of the fold change between the two conditions. The log of the fold change is used so that changes in both directions appear equidistant from the center. Plotting points in this way results in two regions of interest in the plot: those points that are found toward the top of the plot that are far to either the left- or right-hand sides. These represent values that display large magnitude fold changes (hence being left or right of center) as well as high statistical significance (hence being toward the top).

Source: Wikipedia

-----

**Inputs**

A tabular file with a header row containing the columns below (additional columns may be present):

    * P value
    * FDR / adjusted P value
    * Log fold change
    * Labels (e.g. Gene symbols or IDs)

All significant points, those meeting the specified FDR and Log Fold Change thresholds, will be coloured, red for upregulated, blue for downregulated. Users can choose to apply labels to the points (such as gene symbols) from the Labels column. To label all significant points, select "Significant" for the **Points to label** option, or to only label the top most significant specify a number under "Only label top most significant". Users can label any points of interest through selecting **Points to label** "Input from file" and providing a tabular labels file. The labels file must contain a header row and have the labels in the first column. These labels must match the labels in the main input file.

**Outputs**

A PDF containing a Volcano plot like below.

.. image:: $PATH_TO_IMAGES/volcano_plot.png

.. _Volcano plot: https://en.wikipedia.org/wiki/Volcano_plot_(statistics)

    ]]></help>
    <citations>
    </citations>
</tool>