comparison volcanoplot.xml @ 0:59ebf2c42c0e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/volcanoplot commit 7cc1a05f2868f270bf5bdbbd4820ef6f69c9fc8d
author iuc
date Tue, 23 Oct 2018 06:57:26 -0400
parents
children 7b7303fa20e3
comparison
equal deleted inserted replaced
-1:000000000000 0:59ebf2c42c0e
1 <tool id="volcanoplot" name="Volcano Plot" version="0.0.1">
2 <description>create a volcano plot</description>
3 <requirements>
4 <requirement type="package" version="3.0.0">r-ggplot2</requirement>
5 <requirement type="package" version="0.8.0">r-ggrepel</requirement>
6 <requirement type="package" version="0.7.6">r-dplyr</requirement>
7 <requirement type="package" version="1.20.2">r-getopt</requirement>
8 </requirements>
9 <version_command><![CDATA[
10 echo $(R --version | grep version | grep -v GNU)", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggrepel version" $(R --vanilla --slave -e "library(ggrepel); cat(sessionInfo()\$otherPkgs\$ggrepel\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
11 ]]></version_command>
12 <command detect_errors="exit_code"><![CDATA[
13
14 Rscript '${__tool_directory__}/volcanoplot.R'
15
16 -i '$input'
17 -a $fdr_col
18 -p $pval_col
19 -c $lfc_col
20 -l $label_col
21 -s $signif_thresh
22 -x $lfc_thresh
23 #if $labels.label_select == 'file':
24 -f '$labels.label_file'
25 #else if $labels.top_num:
26 -t $labels.top_num
27 #end if
28 #if $plot_options.title:
29 -T '$plot_options.title'
30 #end if
31 #if $plot_options.xlab:
32 -X '$plot_options.xlab'
33 #end if
34 #if $plot_options.ylab:
35 -Y '$plot_options.ylab'
36 #end if
37 #if $plot_options.legend:
38 -L '$plot_options.legend'
39 #end if
40 -z '$plot_options.legend_labs'
41
42 ]]></command>
43 <inputs>
44 <param name="input" type="data" format="tabular" label="Specify an input file" />
45 <param name="fdr_col" type="data_column" data_ref="input" label="FDR (adjusted P value)" />
46 <param name="pval_col" type="data_column" data_ref="input" label="P value (raw)" />
47 <param name="lfc_col" type="data_column" data_ref="input" label="Log Fold Change" />
48 <param name="label_col" type="data_column" data_ref="input" label="Labels" />
49 <param name="signif_thresh" type="float" max="1" value="0.05" label="Significance threshold" help="Default: 0.05"/>
50 <param name="lfc_thresh" type="float" value="0" label="LogFC threshold to colour" help="Default: 0"/>
51 <conditional name="labels">
52 <param name="label_select" type="select" label="Points to label" help="Select to label top significant points or input labels from file. All points meeting the significance threshold are labelled by default.">
53 <option value="signif" selected="True">Significant</option>
54 <option value="file">Input from file</option>
55 </param>
56 <when value="signif">
57 <param name="top_num" type="integer" optional="True" label="Only label top most significant" help="Specify the top number of points to label by P value significance"/>
58 </when>
59 <when value="file">
60 <param name="label_file" type="data" format="tabular" label="File of labels"/>
61 </when>
62 </conditional>
63 <section name="plot_options" expanded="false" title="Plot Options">
64 <param name="title" type="text" optional="True" label="Plot title"/>
65 <param name="xlab" type="text" optional="True" label="Label for x axis"/>
66 <param name="ylab" type="text" optional="True" label="Label for y axis"/>
67 <param name="legend" type="text" optional="True" label="Label for Legend Title"/>
68 <param name="legend_labs" type="text" value="Down,Not Sig,Up" label="Labels for Legend" help="Labels in the legend can be specified. Default: Down,Not Sig,Up"/>
69 </section>
70 </inputs>
71 <outputs>
72 <data name="plot" format="pdf" from_work_dir="out.pdf" label="Volcano plot on ${on_string}"/>
73 </outputs>
74 <tests>
75 <test>
76 <!-- Ensure default output works -->
77 <param name="input" ftype="tabular" value="input.tab"/>
78 <param name="fdr_col" value="4" />
79 <param name="pval_col" value="3" />
80 <param name="lfc_col" value="2" />
81 <param name="label_col" value="1" />
82 <param name="lfc_thresh" value="0" />
83 <output name="plot" value= "out.pdf" compare="sim_size" />
84 </test>
85 <test>
86 <!-- Ensure input labels and plot options work -->
87 <param name="input" ftype="tabular" value="input.tab"/>
88 <param name="fdr_col" value="4" />
89 <param name="pval_col" value="3" />
90 <param name="lfc_col" value="2" />
91 <param name="label_col" value="1" />
92 <param name="lfc_thresh" value="0" />
93 <param name="label_select" value="file"/>
94 <param name="label_file" ftype="tabular" value="labels.tab" />
95 <output name="plot" value= "out2.pdf" compare="sim_size" />
96 </test>
97 </tests>
98 <help><![CDATA[
99 .. class:: infomark
100
101 **What it does**
102
103 This tool creates a Volcano plot using ggplot2. Points can be labelled via ggrepel.
104
105 In statistics, a `Volcano plot`_ is a type of scatter-plot that is used to quickly identify changes in large data sets composed of replicate data. It plots significance versus fold-change on the y and x axes, respectively. These plots are increasingly common in omic experiments such as genomics, proteomics, and metabolomics where one often has a list of many thousands of replicate data points between two conditions and one wishes to quickly identify the most meaningful changes. A volcano plot combines a measure of statistical significance from a statistical test (e.g., a p value from an ANOVA model) with the magnitude of the change, enabling quick visual identification of those data-points (genes, etc.) that display large magnitude changes that are also statistically significant.
106
107 A volcano plot is constructed by plotting the negative log of the p value on the y axis (usually base 10). This results in data points with low p values (highly significant) appearing toward the top of the plot. The x axis is the log of the fold change between the two conditions. The log of the fold change is used so that changes in both directions appear equidistant from the center. Plotting points in this way results in two regions of interest in the plot: those points that are found toward the top of the plot that are far to either the left- or right-hand sides. These represent values that display large magnitude fold changes (hence being left or right of center) as well as high statistical significance (hence being toward the top).
108
109 Source: Wikipedia
110
111 -----
112
113 **Inputs**
114
115 A tabular file with a header row containing the columns below (additional columns may be present):
116
117 * P value
118 * FDR / adjusted P value
119 * Log fold change
120 * Labels (e.g. Gene symbols or IDs)
121
122 All points meeting the specified significance threshold will be labelled by default with the values in the Labels columns. Users can select to only label the top significant points under **Plot Options** or only to label certain points through providing a tabular labels file. The labels file must contin a header row and have the labels in the first column. These labels must match the labels in the main input file. If no labels are desired specify 0 for "Only label top most signifcant". If a log fold change (lfc) threshold is specified, points that meet the significance threshold and lfc threshold will be coloured red if upregulated and blue if downregulated.
123
124 **Outputs**
125
126 A PDF containing a Volcano plot like below.
127
128 .. image:: $PATH_TO_IMAGES/volcano_plot.png
129
130 .. _Volcano plot: https://en.wikipedia.org/wiki/Volcano_plot_(statistics)
131
132 ]]></help>
133 <citations>
134 </citations>
135 </tool>