comparison limma_voom.xml @ 0:bdebdea5f6a7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 2f34a215c35f08c3666f314a87d235437baa1d21
author iuc
date Mon, 12 Jun 2017 07:41:02 -0400
parents
children 76d01fe0ec36
comparison
equal deleted inserted replaced
-1:000000000000 0:bdebdea5f6a7
1 <tool id="limma_voom" name="limma-voom" version="1.1.1">
2 <description>
3 Differential expression with optional sample weights
4 </description>
5
6 <requirements>
7 <requirement type="package" version="3.16.5">bioconductor-edger</requirement>
8 <requirement type="package" version="3.30.13">bioconductor-limma</requirement>
9 <requirement type="package" version="1.4.29">r-statmod</requirement>
10 <requirement type="package" version="0.4.1">r-scales</requirement>
11 </requirements>
12
13 <version_command>
14 <![CDATA[
15 echo $(R --version | grep version | grep -v GNU)", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
16 ]]>
17 </version_command>
18
19 <command detect_errors="exit_code">
20 <![CDATA[
21 Rscript '$__tool_directory__/limma_voom.R'
22 '$counts'
23
24 #if $anno.annoOpt=='yes':
25 '$geneanno'
26 #else:
27 None
28 #end if
29
30 '$outReport'
31 '$outReport.files_path'
32 $rdaOption
33 $normalisationOption
34 $weightOption
35 '$contrast'
36
37 #if $filterCPM.filterLowCPM=='yes':
38 '$filterCPM.cpmReq'
39 '$filterCPM.sampleReq'
40 #else:
41 0
42 0
43 #end if
44
45 #if $testOpt.wantOpt=='yes':
46 '$testOpt.pAdjust'
47 '$testOpt.pVal'
48 '$testOpt.lfc'
49 #else:
50 "BH"
51 0.05
52 0
53 #end if
54
55 '$factName::$factLevel'
56
57 &&
58 mkdir ./output_dir
59
60 &&
61 mv '$outReport.files_path'/*.tsv output_dir/
62
63 ]]>
64 </command>
65
66 <inputs>
67 <param name="counts" type="data" format="tabular" label="Counts Data"/>
68
69 <conditional name="anno">
70 <param name="annoOpt" type="select"
71 label="Use Gene Annotations?"
72 help="If an annotation file is provided, annotations will be added to the table of differential expression results to provide descriptions for each gene.">
73 <option value="no">No</option>
74 <option value="yes">Yes</option>
75 </param>
76 <when value="yes">
77 <param name="geneanno" type="data" format="tabular" label="Gene Annotations"/>
78 </when>
79 <when value="no" />
80 </conditional>
81
82 <!--*Code commented until solution for multiple factors is found*
83 <repeat name="factors" title="Factors" min="1" max="5" default="1">
84 <param name="factName" type="text" label="Factor Name (No spaces)"
85 help="Eg. Genotype"/>
86 <param name="factLevel" type="text" size="100"
87 label="Factor Levels (No spaces)"
88 help="Eg. WT,WT,Mut,Mut,WT"/>
89 </repeat>
90 -->
91
92 <param name="factName" type="text" label="Factor Name" help="Eg. Genotype."/>
93 <param name="factLevel" type="text" label="Factor Values"
94 help="Eg. WT,WT,WT,Mut,Mut,Mut
95 NOTE: Please ensure that the same levels are typed identically with cases matching."/>
96 <param name="contrast" type="text" label="Contrasts of interest" help="Eg. Mut-WT,KD-Control"/>
97
98 <conditional name="filterCPM">
99 <param name="filterLowCPM" type="select" label="Filter Low CPM?"
100 help="Treat genes with very low expression as unexpressed and filter out to speed up computation.">
101 <option value="yes" selected="True">Yes</option>
102 <option value="no">No</option>
103 </param>
104 <when value="yes">
105 <param name="cpmReq" type="float" value="0.5" min="0" label="Minimum CPM"/>
106
107 <param name="sampleReq" type="integer" value="1" min="0" label="Minimum Samples"
108 help="Filter out all the genes that do not meet the minimum CPM in at least this many samples."/>
109 </when>
110 <when value="no"/>
111 </conditional>
112
113 <param name="weightOption" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Apply sample weights?"
114 help="Apply weights if outliers are present.">
115 </param>
116
117 <param name="normalisationOption" type="select" label="Normalisation Method">
118 <option value="TMM">TMM</option>
119 <option value="RLE">RLE</option>
120 <option value="upperquartile">Upperquartile</option>
121 <option value="none">None (Don't normalise)</option>
122 </param>
123
124 <param name="rdaOption" type="boolean" truevalue="yes" falsevalue="no" checked="false"
125 label="Output RData?"
126 help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report.">
127 </param>
128
129 <conditional name="testOpt">
130 <param name="wantOpt" type="select" label="Use Advanced Testing Options?"
131 help="Enable choices for p-value adjustment method, p-value threshold and log2-fold-change threshold.">
132 <option value="no" selected="True">No</option>
133 <option value="yes">Yes</option>
134 </param>
135 <when value="yes">
136 <param name="pAdjust" type="select" label="P-Value Adjustment Method.">
137 <option value="BH">Benjamini and Hochberg (1995)</option>
138 <option value="BY">Benjamini and Yekutieli (2001)</option>
139 <option value="holm">Holm (1979)</option>
140 <option value="none">None</option>
141 </param>
142 <param name="pVal" type="float" value="0.05" min="0" max="1"
143 label="Adjusted Threshold"
144 help="Genes below this threshold are considered significant and highlighted in the MA plot. If either BH(1995) or BY(2001) were selected then this value is a false-discovery-rate control. If Holm(1979) was selected then this is an adjusted p-value for family-wise error rate."/>
145 <param name="lfc" type="float" value="0" min="0"
146 label="Minimum log2-fold-change Required"
147 help="Genes above this threshold and below the p-value threshold are considered significant and highlighted in the MA plot."/>
148 </when>
149 <when value="no"/>
150 </conditional>
151
152 </inputs>
153
154 <outputs>
155 <data format="html" name="outReport" label="${tool.name} on ${on_string}: Report" />
156 <collection name="voom_results" type="list" label="${tool.name} on ${on_string}: DE genes">
157 <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output_dir" visible="false" />
158 </collection>
159 </outputs>
160
161 <tests>
162 <test>
163 <param name="counts" value="matrix.txt" />
164 <param name="factName" value="Genotype" />
165 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
166 <param name="contrast" value="Mut-WT,WT-Mut" />
167 <param name="normalisationOption" value="TMM" />
168 <output_collection name="voom_results" count="2">
169 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
170 <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" />
171 </output_collection>
172 <output name="outReport" >
173 <assert_contents>
174 <has_text text="Limma-voom Analysis Output" />
175 <not_has_text text="RData" />
176 </assert_contents>
177 </output>
178 </test>
179 <test>
180 <param name="annoOpt" value="yes" />
181 <param name="geneanno" value="anno.txt" />
182 <param name="counts" value="matrix.txt" />
183 <param name="factName" value="Genotype" />
184 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
185 <param name="contrast" value="Mut-WT" />
186 <param name="normalisationOption" value="TMM" />
187 <output_collection name="voom_results" >
188 <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WTanno.tsv" />
189 </output_collection>
190 </test>
191 <test>
192 <param name="rdaOption" value="yes" />
193 <param name="counts" value="matrix.txt" />
194 <param name="factName" value="Genotype" />
195 <param name="factLevel" value="WT,WT,WT,Mut,Mut,Mut" />
196 <param name="contrast" value="Mut-WT" />
197 <param name="normalisationOption" value="TMM" />
198 <output name="outReport" >
199 <assert_contents>
200 <has_text text="RData" />
201 </assert_contents>
202 </output>
203 </test>
204 </tests>
205
206 <help>
207 <![CDATA[
208 .. class:: infomark
209
210 **What it does**
211
212 Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool
213 produces plots and tables useful in the analysis of differential gene
214 expression.
215
216 -----
217
218 **Inputs**
219
220 **Counts Data:**
221 A matrix of counts, with rows corresponding to genes
222 and columns corresponding to counts for the samples.
223 Values must be tab separated, with the first row containing the sample/column
224 labels and the first column containing the row/gene labels.
225
226 Example:
227
228 ========== ======= ======= ======= ======== ======== ========
229 **GeneID** **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3**
230 ---------- ------- ------- ------- -------- -------- --------
231 11287 1699 1528 1601 1463 1441 1495
232 11298 1905 1744 1834 1345 1291 1346
233 11302 6 8 7 5 6 5
234 11303 2099 1974 2100 1574 1519 1654
235 11304 356 312 337 361 397 346
236 11305 2528 2438 2493 1762 1942 2027
237 ========== ======= ======= ======= ======== ======== ========
238
239 **Gene Annotations:**
240 Optional input for gene annotations, this can contain more
241 information about the genes than just an ID number. The annotations will
242 be avaiable in the differential expression results table.
243
244 Example:
245
246 ========== ========== ===================================================
247 **GeneID** **Symbol** **GeneName**
248 ---------- ---------- ---------------------------------------------------
249 1287 Pzp pregnancy zone protein
250 1298 Aanat arylalkylamine N-acetyltransferase
251 1302 Aatk apoptosis-associated tyrosine kinase
252 1303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1
253 1304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4
254 1305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2
255 ========== ========== ===================================================
256
257 **Factor Name:**
258 The name of the factor being investigated. This tool currently assumes
259 that only one factor is of interest.
260
261 **Factor Levels:**
262 The levels of the factor of interest, this must be entered in the same
263 order as the samples to which the levels correspond as listed in the
264 columns of the counts matrix.
265
266 The values should be seperated by commas, and spaces must not be used.
267
268 **Contrasts of Interest:**
269 The contrasts you wish to make between levels.
270
271 A common contrast would be a simple difference between two levels: "Mut-WT"
272 represents the difference between the mutant and wild type genotypes.
273
274 The values should be seperated by commas and spaces must not be used.
275
276 **Filter Low CPM:**
277 Option to ignore the genes that do not show significant levels of
278 expression, this filtering is dependent on two criteria:
279
280 * **Minimum CPM:** This is the counts per million that a gene must have in at
281 least some specified number of samples.
282
283 * **Minumum Samples:** This is the number of samples in which the CPM
284 requirement must be met in order for that gene to be acknowledged.
285
286 Only genes that exhibit a CPM greater than the required amount in at least the
287 number of samples specified will be used for analysis. Care should be taken to
288 ensure that the sample requirement is appropriate. In the case of an experiment
289 with two experimental groups each with two members, if there is a change from
290 insignificant cpm to significant cpm but the sample requirement is set to 3,
291 then this will cause that gene to fail the criteria. When in doubt simply do not
292 filter.
293
294
295 **Normalisation Method:**
296 Option for using different methods to rescale the raw library
297 size. For more information, see calcNormFactor section in the edgeR_ user's
298 manual.
299
300 **Apply Sample Weights:**
301 Option to downweight outlier samples such that their information is still
302 used in the statistical analysis but their impact is reduced. Use this
303 whenever significant outliers are present. The MDS plotting tool in this package
304 is useful for identifying outliers. For more information on this option see Liu et al. (2015).
305
306 **Use Advanced Testing Options?:**
307 By default error rate for multiple testing is controlled using Benjamini and
308 Hochberg's false discovery rate control at a threshold value of 0.05. However
309 there are options to change this to custom values.
310
311 * **P-Value Adjustment Method:**
312 Change the multiple testing control method, the options are BH(1995) and
313 BY(2001) which are both false discovery rate controls. There is also
314 Holm(1979) which is a method for family-wise error rate control.
315
316 * **Adjusted Threshold:**
317 Set the threshold for the resulting value of the multiple testing control
318 method. Only observations whose statistic falls below this value is
319 considered significant, thus highlighted in the MA plot.
320
321 * **Minimum log2-fold-change Required:**
322 In addition to meeting the requirement for the adjusted statistic for
323 multiple testing, the observation must have an absolute log2-fold-change
324 greater than this threshold to be considered significant, thus highlighted
325 in the MA plot.
326
327 -----
328
329 **Citations:**
330
331 .. class:: infomark
332
333 limma
334
335 Please cite the paper below for the limma software itself. Please also try
336 to cite the appropriate methodology articles that describe the statistical
337 methods implemented in limma, depending on which limma functions you are
338 using. The methodology articles are listed in Section 2.1 of the limma
339 User's Guide.
340
341 * Smyth GK (2005). Limma: linear models for microarray data. In:
342 'Bioinformatics and Computational Biology Solutions using R and
343 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry,
344 W. Huber (eds), Springer, New York, pages 397-420.
345
346 * Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:
347 precision weights unlock linear model analysis tools for
348 RNA-seq read counts. Genome Biology 15, R29.
349
350 * Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME, Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? Modelling sample and observational level variability improves power in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.
351
352 * Ritchie, M. E., Diyagama, D., Neilson, J., van Laar, R., Dobrovic,
353 A., Holloway, A., and Smyth, G. K. (2006). Empirical array quality weights
354 for microarray data. BMC Bioinformatics 7, Article 261.
355
356 .. class:: infomark
357
358 edgeR
359
360 Please cite the first paper for the software itself and the other papers for
361 the various original statistical methods implemented in edgeR. See
362 Section 1.2 in the User's Guide for more detail.
363
364 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
365 package for differential expression analysis of digital gene expression
366 data. Bioinformatics 26, 139-140
367
368 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing
369 differences in tag abundance. Bioinformatics 23, 2881-2887
370
371 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative
372 binomial dispersion, with applications to SAGE data.
373 Biostatistics, 9, 321-332
374
375 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis
376 of multifactor RNA-Seq experiments with respect to biological variation.
377 Nucleic Acids Research 40, 4288-4297
378
379 Please report problems or suggestions to: su.s@wehi.edu.au
380
381 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
382 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
383 ]]>
384 </help>
385 <citations>
386 <citation type="doi">10.1093/nar/gkv412</citation>
387 </citations>
388 </tool>