Mercurial > repos > shians > voom_rnaseq
comparison diffexp.xml @ 0:7a80e9ec63cb
- Initial commit
author | shian_su <registertonysu@gmail.com> |
---|---|
date | Tue, 16 Dec 2014 14:38:15 +1100 |
parents | |
children | b2fe55fd0651 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7a80e9ec63cb |
---|---|
1 <tool id="diffexp" name="Voom Rnaseq" version="1.1.0"> | |
2 <description> | |
3 Perform differential expression analysis using pipeline based on the voom | |
4 function of the limma bioconductor package. This tool takes a count matrix | |
5 (tab separated) as input and produces a HTML report as output. | |
6 </description> | |
7 | |
8 <requirements> | |
9 <requirement type="R-module" version="3.5.27">edgeR</requirement> | |
10 <requirement type="R-module" version="3.18.13">limma</requirement> | |
11 </requirements> | |
12 | |
13 <stdio> | |
14 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
15 </stdio> | |
16 | |
17 <command interpreter="Rscript"> | |
18 diffexp.R $counts | |
19 | |
20 #if $anno.annoOpt=="yes": | |
21 $geneanno | |
22 #else: | |
23 None | |
24 #end if | |
25 | |
26 $outFile | |
27 $outFile.files_path | |
28 "no" <!-- Disabled Rda option --> | |
29 $normalisationOption | |
30 $weightCond.weightOption | |
31 "$contrast" | |
32 | |
33 #if $filterCPM.filterLowCPM=="yes": | |
34 $filterCPM.cpmReq | |
35 $filterCPM.sampleReq | |
36 #else: | |
37 0 | |
38 0 | |
39 #end if | |
40 | |
41 #if $testOpt.wantOpt=="yes": | |
42 "$testOpt.pAdjust" | |
43 $testOpt.pVal | |
44 $testOpt.lfc | |
45 #else: | |
46 "BH" | |
47 0.05 | |
48 0 | |
49 #end if | |
50 | |
51 <!--*Code commented until solution for multiple factors is found* | |
52 #for $i, $fct in enumerate($factors): | |
53 $fct.factName::$fct.factLevel | |
54 #end for | |
55 --> | |
56 "$factName::$factLevel" | |
57 | |
58 </command> | |
59 | |
60 <inputs> | |
61 <param name="counts" type="data" format="tabular" label="Counts Data"/> | |
62 | |
63 <conditional name="anno"> | |
64 <param name="annoOpt" type="select" label="Use Gene Annotations?" | |
65 help="Annotations will be added to table of top differential | |
66 expressions to provide descriptions for each gene."> | |
67 <option value="no">No</option> | |
68 <option value="yes">Yes</option> | |
69 </param> | |
70 | |
71 <when value="yes"> | |
72 <param name="geneanno" type="data" format="tabular" | |
73 label="Gene Annotations"/> | |
74 </when> | |
75 </conditional> | |
76 | |
77 <!--*Code commented until solution for multiple factors is found* | |
78 <repeat name="factors" title="Factors" min="1" max="5" default="1"> | |
79 <param name="factName" type="text" label="Factor Name (No spaces)" | |
80 help="Eg. Genotype"/> | |
81 <param name="factLevel" type="text" size="100" | |
82 label="Factor Levels (No spaces)" | |
83 help="Eg. WT,WT,Mut,Mut,WT"/> | |
84 </repeat> | |
85 --> | |
86 | |
87 <param name="factName" type="text" label="Factor Name" | |
88 help="Eg. Genotype."/> | |
89 <param name="factLevel" type="text" size="100" | |
90 label="Factor Values" | |
91 help="Eg. WT,WT,Mut,Mut,WT... NOTE: Please ensure that the same | |
92 levels are typed identically when repeated, with all cases | |
93 matching."/> | |
94 | |
95 <param name="contrast" type="text" size="30" | |
96 label="Contrasts of interest" | |
97 help="Eg. Mut-WT,KD-Control."/> | |
98 | |
99 <conditional name="filterCPM"> | |
100 <param name="filterLowCPM" type="select" label="Filter Low CPM?" | |
101 help="Treat genes with very low expression as unexpressed and | |
102 filter out to speed up computation."> | |
103 <option value="yes" selected="True">Yes</option> | |
104 <option value="no">No</option> | |
105 </param> | |
106 | |
107 <when value="yes"> | |
108 <param name="cpmReq" type="float" value="0.5" min="0" | |
109 label="Minimum CPM"/> | |
110 | |
111 <param name="sampleReq" type="integer" value="1" min="0" | |
112 label="Minimum Samples" | |
113 help="Filter out all the genes that do not meet the minimum | |
114 CPM in at least this many samples."/> | |
115 </when> | |
116 | |
117 <when value="no"/> | |
118 | |
119 </conditional> | |
120 | |
121 <conditional name="weightCond"> | |
122 <param name="weightOption" type="select" label="Apply sample weights?" | |
123 display="radio" help="Apply weights if outliers are present."> | |
124 | |
125 <option value="no">No</option> | |
126 <option value="yes">Yes</option> | |
127 | |
128 </param> | |
129 </conditional> | |
130 | |
131 <param name="normalisationOption" type="select" | |
132 label="Normalisation Method"> | |
133 | |
134 <option value="TMM">TMM</option> | |
135 <option value="RLE">RLE</option> | |
136 <option value="upperquartile">Upperquartile</option> | |
137 <option value="none">None (Don't normalise)</option> | |
138 | |
139 </param> | |
140 | |
141 <conditional name="testOpt"> | |
142 <param name="wantOpt" type="select" label="Use Advanced Testing Options?" | |
143 help="Enable choices for p-value adjustment method, p-value threshold | |
144 and log2-fold-change threshold."> | |
145 <option value="no" selected="True">No</option> | |
146 <option value="yes">Yes</option> | |
147 </param> | |
148 | |
149 <when value="yes"> | |
150 <param name="pAdjust" type="select" label="P-Value Adjustment Method."> | |
151 <option value="BH">Benjamini and Hochberg (1995)</option> | |
152 <option value="BY">Benjamini and Yekutieli (2001)</option> | |
153 <option value="holm">Holm (1979)</option> | |
154 <option value="none">None</option> | |
155 </param> | |
156 | |
157 <param name="pVal" type="float" value="0.05" min="0" max="1" | |
158 label="Adjusted Threshold" | |
159 help="Genes below this threshold are considered significant and | |
160 highlighted in the MA plot. If either BH(1995) or | |
161 BY(2001) were selected then this value is a | |
162 false-discovery-rate control. If Holm(1979) was selected | |
163 then this is an adjusted p-value for family-wise error | |
164 rate."/> | |
165 | |
166 <param name="lfc" type="float" value="0" min="0" | |
167 label="Minimum log2-fold-change Required" | |
168 help="Genes above this threshold and below the p-value | |
169 threshold are considered significant and highlighted | |
170 in the MA plot."/> | |
171 </when> | |
172 | |
173 <when value="no"/> | |
174 | |
175 </conditional> | |
176 | |
177 <!-- <conditional name="wantRda"> | |
178 <param name="rdaOption" type="select" label="Output RData?" | |
179 display="radio" | |
180 help="Output all the data R used to construct the plots, | |
181 can be loaded into R."> | |
182 | |
183 <option value="no">No</option> | |
184 <option value="yes">Yes</option> | |
185 | |
186 </param> | |
187 </conditional> --> | |
188 </inputs> | |
189 | |
190 <outputs> | |
191 <data format="html" name="outFile" label="Voom Output"/> | |
192 </outputs> | |
193 | |
194 | |
195 <help> | |
196 .. class:: infomark | |
197 | |
198 **What it does** | |
199 | |
200 Given a matrix of counts and optional information about the genes, this tool | |
201 produces plots and tables useful in the analysis of differential gene | |
202 expression. | |
203 | |
204 .. class:: warningmark | |
205 | |
206 This tool is dependent on the R packages limma_ and edgeR_ as a part of the | |
207 bioconductor project. Please ensure that these packages are installed on the | |
208 server running this tool. | |
209 | |
210 ----- | |
211 | |
212 **Counts Data:** | |
213 A matrix of expression level with rows corresponding to particular genes | |
214 and columns corresponding to the feature count in particular samples. | |
215 Values must be tab separated and there must be a row for the sample/column | |
216 labels and a column for the row/gene labels. | |
217 | |
218 Example:: | |
219 | |
220 "GeneID" "Smpl1" "Smpl2" "Smpl3" "Smpl4" "Smpl5" | |
221 "27395" 1699 1528 1463 1441 1495 | |
222 "18777" 1905 1744 1345 1291 1346 | |
223 "15037" 6 8 4 5 5 | |
224 "21399" 2099 1974 1574 1519 1654 | |
225 "58175" 356 312 347 361 346 | |
226 "10866" 2528 2438 1762 1942 2027 | |
227 "12421" 2182 2005 1786 1799 1858 | |
228 "24069" 3 4 2 3 3 | |
229 "31926" 1337 1380 1004 1102 1000 | |
230 "71096" 0 0 2 1 6 | |
231 "59014" 1466 1426 1296 1097 1175 | |
232 ... | |
233 | |
234 **Gene Annotations:** | |
235 Optional input for gene annotations, this can contain more | |
236 information about the genes than just an ID number. The annotations will | |
237 be avaiable in the top differential expression table. | |
238 | |
239 Example:: | |
240 | |
241 "GeneID" "Length" "EntrezID" "Symbols" "GeneName" "Chr" | |
242 "11287" "11287" 4681 "11287" "Pzp" "pregnancy zone protein" "6" | |
243 "11298" "11298" 1455 "11298" "Aanat" "arylalkylamine N-acetyltransferase" "11" | |
244 "11302" "11302" 5743 "11302" "Aatk" "apoptosis-associated tyrosine kinase" "11" | |
245 "11303" "11303" 10260 "11303" "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" "4" | |
246 "11304" "11304" 7248 "11304" "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" "3" | |
247 "11305" "11305" 8061 "11305" "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" "2" | |
248 ... | |
249 | |
250 **Factor Name:** | |
251 The name of the factor being investigated. This tool currently assumes | |
252 that only one factor is of interest. | |
253 | |
254 **Factor Levels:** | |
255 The levels of the factor of interest, this must be entered in the same | |
256 order as the samples to which the levels correspond as listed in the | |
257 columns of the counts matrix. | |
258 | |
259 The values should be seperated by commas, and spaces must not be used. | |
260 | |
261 **Contrasts of Interest:** | |
262 The contrasts you wish to make between levels. | |
263 | |
264 Common contrasts would be a simple difference between two levels: "Mut-WT" | |
265 represents the difference between the mutant and wild type genotypes. | |
266 | |
267 The values should be seperated by commas and spaces must not be used. | |
268 | |
269 **Filter Low CPM:** | |
270 Option to ignore the genes that do not show significant levels of | |
271 expression, this filtering is dependent on two criteria: | |
272 | |
273 * **Minimum CPM:** This is the counts per million that a gene must have in at | |
274 least some specified number of samples. | |
275 | |
276 * **Minumum Samples:** This is the number of samples in which the CPM | |
277 requirement must be met in order for that gene to be acknowledged. | |
278 | |
279 Only genes that exhibit a CPM greater than the required amount in at least the | |
280 number of samples specified will be used for analysis. Care should be taken to | |
281 ensure that the sample requirement is appropriate. In the case of an experiment | |
282 with two experimental groups each with two members, if there is a change from | |
283 insignificant cpm to significant cpm but the sample requirement is set to 3, | |
284 then this will cause that gene to fail the criteria. When in doubt simply do not | |
285 filter. | |
286 | |
287 | |
288 **Normalisation Method:** | |
289 Option for using different methods to rescale the raw library | |
290 size. For more information, see calcNormFactor section in the edgeR_ user's | |
291 manual. | |
292 | |
293 **Apply Sample Weights:** | |
294 Option to downweight outlier samples such that their information is still | |
295 used in the statistical analysis but their impact is reduced. Use this | |
296 whenever significant outliers are present. The MDS plotting tool in this package | |
297 is useful for identifying outliers | |
298 | |
299 **Use Advanced Testing Options?:** | |
300 By default error rate for multiple testing is controlled using Benjamini and | |
301 Hochberg's false discovery rate control at a threshold value of 0.05. However | |
302 there are options to change this to custom values. | |
303 | |
304 * **P-Value Adjustment Method:** | |
305 Change the multiple testing control method, the options are BH(1995) and | |
306 BY(2001) which are both false discovery rate controls. There is also | |
307 Holm(1979) which is a method for family-wise error rate control. | |
308 | |
309 * **Adjusted Threshold:** | |
310 Set the threshold for the resulting value of the multiple testing control | |
311 method. Only observations whose statistic falls below this value is | |
312 considered significant, thus highlighted in the MA plot. | |
313 | |
314 * **Minimum log2-fold-change Required:** | |
315 In addition to meeting the requirement for the adjusted statistic for | |
316 multiple testing, the observation must have an absolute log2-fold-change | |
317 greater than this threshold to be considered significant, thus highlighted | |
318 in the MA plot. | |
319 | |
320 ----- | |
321 | |
322 **Citations:** | |
323 | |
324 .. class:: infomark | |
325 | |
326 limma | |
327 | |
328 Please cite the paper below for the limma software itself. Please also try | |
329 to cite the appropriate methodology articles that describe the statistical | |
330 methods implemented in limma, depending on which limma functions you are | |
331 using. The methodology articles are listed in Section 2.1 of the limma | |
332 User's Guide. | |
333 | |
334 * Smyth, GK (2005). Limma: linear models for microarray data. In: | |
335 'Bioinformatics and Computational Biology Solutions using R and | |
336 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, | |
337 W. Huber (eds), Springer, New York, pages 397-420. | |
338 | |
339 * Law, CW, Chen, Y, Shi, W, and Smyth, GK (2014). Voom: | |
340 precision weights unlock linear model analysis tools for | |
341 RNA-seq read counts. Genome Biology 15, R29. | |
342 | |
343 .. class:: infomark | |
344 | |
345 edgeR | |
346 | |
347 Please cite the first paper for the software itself and the other papers for | |
348 the various original statistical methods implemented in edgeR. See | |
349 Section 1.2 in the User's Guide for more detail. | |
350 | |
351 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor | |
352 package for differential expression analysis of digital gene expression | |
353 data. Bioinformatics 26, 139-140 | |
354 | |
355 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing | |
356 differences in tag abundance. Bioinformatics 23, 2881-2887 | |
357 | |
358 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative | |
359 binomial dispersion, with applications to SAGE data. | |
360 Biostatistics, 9, 321-332 | |
361 | |
362 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis | |
363 of multifactor RNA-Seq experiments with respect to biological variation. | |
364 Nucleic Acids Research 40, 4288-4297 | |
365 | |
366 Report problems to: su.s@wehi.edu.au | |
367 | |
368 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | |
369 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html | |
370 | |
371 </help> | |
372 </tool> |