comparison edgeR.xml @ 0:91ca33096034 draft

Uploaded
author amawla
date Tue, 13 Jan 2015 21:12:26 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:91ca33096034
1 <tool id="edgeR" name="edgeR" version="0.0.3">
2 <description> - Estimates differential gene expression for short read sequence count using methods appropriate for count data</description>
3 <requirements>
4 <requirement type="R-module">edgeR</requirement>
5 <requirement type="R-module">limma</requirement>
6 </requirements>
7 <command interpreter="perl">
8 edgeR.pl -a $analysis_type.analysis -e $html_file.files_path -f BH -h $html_file -o $output
9
10 <!--Pairwise comparisons 1 Factor Analysis-->
11 #if $analysis_type.analysis == "pw":
12 -r $analysis_type.rowsumfilter
13 #if $analysis_type.tagwise_disp.twd == "TRUE":
14 -u movingave
15 -t
16 #end if
17 <!--GLM Generalized Linear Models (Multiple Factors)-->
18 #else if $analysis_type.analysis == "glm":
19 #if $analysis_type.exp.export_norm == "true":
20 -n $norm_exp
21 #end if
22 -d tag
23 $analysis_type.cont_pw
24 #for $cnt in $analysis_type.contrasts:
25 "cnt::${cnt.add_cont}"
26 #end for
27
28 <!--LIMMA Linearized Models (Multiple Factors)-->
29 #else
30 #if $analysis_type.exp.export_norm == "true":
31 -n $norm_exp $analysis_type.exp.log
32 #end if
33 $analysis_type.cont_pw
34 #for $cnt in $analysis_type.contrasts:
35 "cnt::${cnt.add_cont}"
36 #end for
37 #end if
38 $matrix
39
40 </command>
41
42 <inputs>
43 <param name="matrix" type="data" format="tabular" label="Digital Expression Matrix"/>
44 <conditional name="analysis_type">
45 <param name="analysis" type="select" label="Type Of Analysis">
46 <option value="pw">Pairwise comparisons (1 Factor Analysis)</option>
47 <option value="glm" selected="true">Generalized Linear Models (Multiple Factor Analysis using GLM)</option>
48 <option value="limma">Linear Models for RNA-Seq (Multiple Factor Analysis using LIMMA)</option>
49 </param>
50 <when value="pw">
51 <param name="rowsumfilter" type="integer" value="5" label="Common Dispersion Rowsum Filter" help="Numeric scalar giving a value for the filtering out of low abundance tags in the estimation of the common dispersion. Only tags with total sum of counts above this value are used in the estimation of the common dispersion. Low abundance tags can adversely affect the estimation of the common dispersion, so this argument allows the user to select an appropriate filter threshold for the tag abundance."/>
52 <conditional name="tagwise_disp">
53 <param name="twd" type="select" label="Maximize the Negative Binomial Weighted Conditional Likelihood" help="Calculate and use an estimate of the dispersion parameter for each tag">
54 <option value="TRUE" selected="true">True</option>
55 <option value="FALSE">False</option>
56 </param>
57 </conditional>
58 </when>
59 <when value="glm">
60 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
61 <repeat name="contrasts" title="Contrast">
62 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
63 </repeat>
64 <conditional name="exp">
65 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
66 <option value="true">Yes</option>
67 <option value="false">No</option>
68 </param>
69 </conditional>
70 </when>
71 <when value="limma">
72 <param name="cont_pw" type="boolean" truevalue="-m" falsevalue="" checked="True" label="Perform all pairwise comparisons" help="Include all pairwise comparisons in the contrast matrix."/>
73 <repeat name="contrasts" title="Contrast">
74 <param name="add_cont" title="Contrast" type="text" label="Enter the contrast of interest, e.g. (G1+G2)/2-G3 (no spaces or commas)"/>
75 </repeat>
76 <conditional name="exp">
77 <param name="export_norm" type="select" label="Save Normalised DGE Matrix">
78 <option value="true">Yes</option>
79 <option value="false">No</option>
80 </param>
81 <when value="true">
82 <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="True" label="Export Normalised DGE Matrix in Log2" help="Selecting this will log base 2 transform the Normalised Digital Gene Expression Matrix."/>
83 </when>
84 </conditional>
85 </when>
86 </conditional>
87 </inputs>
88
89 <outputs>
90 <data format="tabular" name="output" label="EdgeR analysis on ${matrix.name}"/>
91 <data name="html_file" format="html" label="EdgeR analysis plots for ${matrix.name}"/>
92 <data name="norm_exp" format="tabular" label="EdgeR Norm Expr Matrix for ${matrix.name}">
93 <filter>analysis_type[ "analysis" ] != "pw" and analysis_type[ "exp" ][ "export_norm" ] == "true"</filter>
94 </data>
95 </outputs>
96
97 <help>
98
99 .. class:: infomark
100
101 **What it does**
102
103 Estimates differential gene expression for short read sequence count using methods appropriate for count data.
104 If you have paired data you may also want to consider Tophat/Cufflinks.
105 Input must be raw count data for each sequence arranged in a rectangular matrix as a tabular file.
106 Note - no scaling - please make sure you have untransformed raw counts of reads for each sequence.
107
108 Performs digital differential gene expression analysis between groups (eg a treatment and control).
109 Biological replicates provide information about experimental variability required for reliable inference.
110
111 **What it does not do**
112 edgeR_ requires biological replicates.
113 Without replicates you can't account for known important experimental sources of variability that the approach implemented here requires.
114
115
116 **Input**
117 A count matrix containing sequence names as rows and sample specific counts of reads from this sequence as columns.
118 The matrix must have 2 header rows, the first indicating the group assignment and the second uniquely identifiying the samples. It must also contain a unique set of (eg Feature) names in the first column.
119
120 Example::
121
122 # G1:Mut G1:Mut G1:Mut G2:WT G2:WT G2:WT
123 #Feature Spl1 Spl2 Spl3 Spl4 Spl5 Spl6
124 NM_001001130 97 43 61 34 73 26
125 NM_001001144 25 8 9 3 5 5
126 NM_001001152 72 45 29 20 31 13
127 NM_001001160 0 1 1 1 0 0
128 NM_001001177 0 1 0 4 3 3
129 NM_001001178 0 2 1 0 4 0
130 NM_001001179 0 0 0 0 0 2
131 NM_001001180 0 0 0 0 0 2
132 NM_001001181 415 319 462 185 391 155
133 NM_001001182 1293 945 987 297 938 496
134 NM_001001183 5 4 11 7 11 2
135 NM_001001184 135 198 178 110 205 64
136 NM_001001185 186 1 0 1 1 0
137 NM_001001186 75 90 91 34 63 54
138 NM_001001187 267 236 170 165 202 51
139 NM_001001295 5 2 6 1 7 0
140 NM_001001309 1 0 0 1 2 1
141 ...
142
143
144 Please use the "Count reads in features with htseq-count" tool to generate the count matrix.
145
146 **Output**
147
148 A tabular file containing relative expression levels, statistical estimates of differential expression probability, R scripts, log, and some helpful diagnostic plots.
149
150 **Fixed Parameters**
151
152 Method for allowing the prior distribution for the dispersion to be abundance-dependent used: movingave
153
154 False discovery rate adjustment method used: Benjamini and Hochberg (1995)
155
156 GLM dispersion estimate used: Tagwise Dispersion
157
158 Gene filter used: less than 1 count per million reads
159
160 .. class:: infomark
161
162 **Attribution**
163 This tool wraps the edgeR_ Bioconductor package so all calculations and plots are controlled by that code. See edgeR_ for all documentation and appropriate attribution.
164 Recommended reference is Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818
165
166 .. class:: infomark
167
168 **Attribution**
169 When applying the LIMMA (Linear models for RNA-Seq) anlysis the tool also makes use of the limma_ Bioconductor package.
170 Recommended reference is Smyth, G. K. (2005). Limma: linear models for microarray data. In: 'Bioinformatics and Computational Biology Solutions using R and Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, W. Huber (eds), Springer, New York, pages 397--420.
171
172 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
173 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
174
175
176 </help>
177
178 </tool>