Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Differential_Gene_Expression.xml @ 1:a4a4c88783ea draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 2700e500a4fb135a20ede7d52221a9d31f1aaa5e-dirty
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 04:59:05 -0400 |
parents | |
children | ec951a5017f8 |
comparison
equal
deleted
inserted
replaced
0:acb56603b268 | 1:a4a4c88783ea |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | |
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> | |
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description> | |
4 | |
5 <macros> | |
6 <import>edgeR_macros.xml</import> | |
7 </macros> | |
8 | |
9 <requirements> | |
10 <requirement type="package" version="3.11.0">edger</requirement> | |
11 </requirements> | |
12 | |
13 <stdio> | |
14 <regex match="Error in[^a-z]+contrasts" | |
15 source="both" | |
16 level="fatal" | |
17 description="Have the design- and expression-matrix been swapped?" /> | |
18 <regex match="Execution halted" | |
19 source="both" | |
20 level="fatal" /> | |
21 <regex match="Calculating library sizes from column" | |
22 source="stderr" | |
23 level="log" /> | |
24 <regex match="During startup - Warning messages" | |
25 source="stderr" | |
26 level="log" /> | |
27 <regex match="Setting LC_[^ ]+ failed" | |
28 source="stderr" | |
29 level="warning" | |
30 description="LOCALE has not been set correctly" /> | |
31 </stdio> | |
32 | |
33 <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> | |
34 | |
35 <command> | |
36 R --vanilla --slave -f $R_script '--args | |
37 $expression_matrix | |
38 $design_matrix | |
39 $contrast | |
40 | |
41 $fdr | |
42 | |
43 $output_count_edgeR | |
44 $output_cpm | |
45 | |
46 /dev/null <!-- Calculation of FPKM/RPKM should come here --> | |
47 | |
48 #if $output_raw_counts: | |
49 $output_raw_counts | |
50 #else: | |
51 /dev/null | |
52 #end if | |
53 | |
54 #if $output_MDSplot_logFC: | |
55 $output_MDSplot_logFC | |
56 #else: | |
57 /dev/null | |
58 #end if | |
59 | |
60 #if $output_MDSplot_bcv: | |
61 $output_MDSplot_bcv | |
62 #else: | |
63 /dev/null | |
64 #end if | |
65 | |
66 #if $output_BCVplot: | |
67 $output_BCVplot | |
68 #else: | |
69 /dev/null | |
70 #end if | |
71 | |
72 #if $output_MAplot: | |
73 $output_MAplot | |
74 #else: | |
75 /dev/null | |
76 #end if | |
77 | |
78 #if $output_PValue_distribution_plot: | |
79 $output_PValue_distribution_plot | |
80 #else: | |
81 /dev/null | |
82 #end if | |
83 | |
84 #if $output_hierarchical_clustering_plot: | |
85 $output_hierarchical_clustering_plot | |
86 #else: | |
87 /dev/null | |
88 #end if | |
89 | |
90 #if $output_heatmap_plot: | |
91 $output_heatmap_plot | |
92 #else: | |
93 /dev/null | |
94 #end if | |
95 | |
96 #if $output_RData_obj: | |
97 $output_RData_obj | |
98 #else: | |
99 /dev/null | |
100 #end if | |
101 | |
102 $output_format_images | |
103 ' | |
104 #if $output_R: | |
105 > $output_R | |
106 #else: | |
107 > /dev/null | |
108 #end if | |
109 </command> | |
110 | |
111 <configfiles> | |
112 <configfile name="R_script"> | |
113 <![CDATA[ | |
114 | |
115 library(limma,quietly=TRUE) ## quietly to avoid unnecessaity stderr messages | |
116 library(edgeR,quietly=TRUE) ## quietly to avoid unnecessaity stderr messages | |
117 library(splines,quietly=TRUE)## quietly to avoid unnecessaity stderr messages | |
118 | |
119 ## Fetch commandline arguments | |
120 args <- commandArgs(trailingOnly = TRUE) | |
121 | |
122 expression_matrix_file <- args[1] | |
123 design_matrix_file <- args[2] | |
124 contrast <- args[3] | |
125 | |
126 fdr <- args[4] | |
127 | |
128 output_count_edgeR <- args[5] | |
129 output_cpm <- args[6] | |
130 | |
131 output_xpkm <- args[7] ##FPKM file - to be implemented | |
132 | |
133 output_raw_counts <- args[8] | |
134 output_MDSplot_logFC <- args[9] | |
135 output_MDSplot_bcv <- args[10] | |
136 output_BCVplot <- args[11] | |
137 output_MAplot <- args[12] | |
138 output_PValue_distribution_plot <- args[13] | |
139 output_hierarchical_clustering_plot <- args[14] | |
140 output_heatmap_plot <- args[15] | |
141 output_RData_obj <- args[16] | |
142 output_format_images <- args[17] | |
143 | |
144 | |
145 ## Obtain read-counts | |
146 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) | |
147 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) | |
148 | |
149 colnames(design_matrix) <- make.names(colnames(design_matrix)) | |
150 | |
151 for(i in 1:ncol(design_matrix)) { | |
152 old <- design_matrix[,i] | |
153 design_matrix[,i] <- make.names(design_matrix[,i]) | |
154 if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) { | |
155 print("Renaming of factors:") | |
156 print(old) | |
157 print("To:") | |
158 print(design_matrix[,i]) | |
159 } | |
160 ## The following line seems to malfunction the script: | |
161 ##design_matrix[,i] <- as.factor(design_matrix[,i]) | |
162 } | |
163 | |
164 ## 1) In the expression matrix, you only want to have the samples described in the design matrix | |
165 columns <- match(rownames(design_matrix),colnames(expression_matrix)) | |
166 columns <- columns[!is.na(columns)] | |
167 read_counts <- expression_matrix[,columns] | |
168 | |
169 ## 2) In the design matrix, you only want to have samples of which you really have the counts | |
170 columns <- match(colnames(read_counts),rownames(design_matrix)) | |
171 columns <- columns[!is.na(columns)] | |
172 design_matrix <- design_matrix[columns,,drop=FALSE] | |
173 | |
174 ## Filter for HTSeq predifined counts: | |
175 exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique") | |
176 exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned") | |
177 | |
178 exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts)) | |
179 exclude <- exclude[is.na(exclude)==0] | |
180 if(length(exclude) != 0) { | |
181 read_counts <- read_counts[-exclude,] | |
182 } | |
183 | |
184 | |
185 ## sorting expression matrix with the order of the read_counts | |
186 ##order <- match(colnames(read_counts) , rownames(design_matrix)) | |
187 ##read_counts_ordered <- read_counts[,order2] | |
188 | |
189 empty_samples <- apply(read_counts,2,function(x) sum(x) == 0) | |
190 if(sum(empty_samples) > 0) { | |
191 write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr()) | |
192 write(colnames(read_counts)[empty_samples],stderr()) | |
193 } else { | |
194 | |
195 dge <- DGEList(counts=read_counts,genes=rownames(read_counts)) | |
196 | |
197 formula <- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ") | |
198 design_matrix_tmp <- design_matrix | |
199 colnames(design_matrix_tmp) <- make.names(colnames(design_matrix_tmp)) | |
200 design <- model.matrix(as.formula(formula),design_matrix_tmp) | |
201 rm(design_matrix_tmp) | |
202 | |
203 # Filter prefixes | |
204 prefixes = colnames(design_matrix)[attr(design,"assign")] | |
205 avoid = nchar(prefixes) == nchar(colnames(design)) | |
206 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design))) | |
207 replacements[avoid] = colnames(design)[avoid] | |
208 colnames(design) = replacements | |
209 | |
210 # Do normalization | |
211 write("Calculating normalization factors...",stdout()) | |
212 dge <- calcNormFactors(dge) | |
213 write("Estimating common dispersion...",stdout()) | |
214 dge <- estimateGLMCommonDisp(dge,design) | |
215 write("Estimating trended dispersion...",stdout()) | |
216 dge <- estimateGLMTrendedDisp(dge,design) | |
217 write("Estimating tagwise dispersion...",stdout()) | |
218 dge <- estimateGLMTagwiseDisp(dge,design) | |
219 | |
220 | |
221 if(output_MDSplot_logFC != "/dev/null") { | |
222 write("Creating MDS plot (logFC method)",stdout()) | |
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | |
224 dev.off()# Kill it | |
225 | |
226 if(output_format_images == "pdf") { | |
227 pdf(output_MDSplot_logFC,height=14,width=14) | |
228 } else if(output_format_images == "svg") { | |
229 svg(output_MDSplot_logFC,height=14,width=14) | |
230 } else { | |
231 ## png(output_MDSplot_logFC) | |
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
233 | |
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) | |
235 } | |
236 | |
237 | |
238 diff_x <- abs(max(points\$x)-min(points\$x)) | |
239 diff_y <-(max(points\$y)-min(points\$y)) | |
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") | |
241 points(points\$x,points\$y,pch=20) | |
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | |
243 rm(diff_x,diff_y) | |
244 | |
245 dev.off() | |
246 } | |
247 | |
248 if(output_MDSplot_bcv != "/dev/null") { | |
249 write("Creating MDS plot (bcv method)",stdout()) | |
250 | |
251 ## 1. First create a virtual plot to obtain the desired coordinates | |
252 pdf("bcvmds.pdf") | |
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) | |
254 dev.off()# Kill it | |
255 | |
256 ## 2. Re-plot the coordinates in a new figure with the size and settings. | |
257 if(output_format_images == "pdf") { | |
258 pdf(output_MDSplot_bcv,height=14,width=14) | |
259 } else if(output_format_images == "svg") { | |
260 svg(output_MDSplot_bcv,height=14,width=14) | |
261 } else { | |
262 ## png(output_MDSplot_bcv) | |
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
264 | |
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) | |
266 } | |
267 | |
268 diff_x <- abs(max(points\$x)-min(points\$x)) | |
269 diff_y <- (max(points\$y)-min(points\$y)) | |
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") | |
271 points(points\$x,points\$y,pch=20) | |
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | |
273 rm(diff_x,diff_y) | |
274 | |
275 dev.off() | |
276 } | |
277 | |
278 | |
279 if(output_BCVplot != "/dev/null") { | |
280 write("Creating Biological coefficient of variation plot",stdout()) | |
281 | |
282 if(output_format_images == "pdf") { | |
283 pdf(output_BCVplot) | |
284 } else if(output_format_images == "svg") { | |
285 svg(output_BCVplot) | |
286 } else { | |
287 ## png(output_BCVplot) | |
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
289 | |
290 bitmap(output_BCVplot,type="png16m") | |
291 } | |
292 | |
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") | |
294 dev.off() | |
295 } | |
296 | |
297 | |
298 write("Fitting GLM...",stdout()) | |
299 fit <- glmFit(dge,design) | |
300 | |
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) | |
302 cont <- c(contrast) | |
303 cont <- makeContrasts(contrasts=cont, levels=design) | |
304 | |
305 lrt <- glmLRT(fit, contrast=cont[,1]) | |
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) | |
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) | |
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) | |
309 | |
310 ## todo EXPORT FPKM | |
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) | |
312 | |
313 if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") { | |
314 etable <- topTags(lrt, n=nrow(dge))\$table | |
315 etable <- etable[order(etable\$FDR), ] | |
316 | |
317 if(output_MAplot != "/dev/null") { | |
318 write("Creating MA plot...",stdout()) | |
319 | |
320 if(output_format_images == "pdf") { | |
321 pdf(output_MAplot) | |
322 } else if(output_format_images == "svg") { | |
323 svg(output_MAplot) | |
324 } else { | |
325 ## png(output_MAplot) | |
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
327 | |
328 bitmap(output_MAplot,type="png16m") | |
329 } | |
330 | |
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) | |
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) | |
333 abline(h=c(-1,1), col="blue") | |
334 dev.off() | |
335 } | |
336 | |
337 if(output_PValue_distribution_plot != "/dev/null") { | |
338 write("Creating P-value distribution plot...",stdout()) | |
339 | |
340 if(output_format_images == "pdf") { | |
341 pdf(output_PValue_distribution_plot,width=14,height=14) | |
342 } else if(output_format_images == "svg") { | |
343 svg(output_PValue_distribution_plot,width=14,height=14) | |
344 } else { | |
345 ## png(output_PValue_distribution_plot) | |
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
347 | |
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) | |
349 } | |
350 | |
351 expressed_genes <- subset(etable, PValue < 0.99) | |
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") | |
353 center <- sum(h\$counts) / length(h\$counts) | |
354 lines(c(0,1),c(center,center),lty=2,col="red",lwd=2) | |
355 k <- ksmooth(h\$mid, h\$counts) | |
356 lines(k\$x,k\$y,col="red",lwd=2) | |
357 rmsd <- (h\$counts) - center | |
358 rmsd <- rmsd^2 | |
359 rmsd <- sum(rmsd) | |
360 rmsd <- sqrt(rmsd) | |
361 text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue") | |
362 ## change e into epsilon somehow | |
363 dev.off() | |
364 } | |
365 } | |
366 | |
367 if(output_heatmap_plot != "/dev/null") { | |
368 | |
369 if(output_format_images == "pdf") { | |
370 pdf(output_heatmap_plot,width=10.5) | |
371 } else if(output_format_images == "svg") { | |
372 svg(output_heatmap_plot,width=10.5) | |
373 } else { | |
374 ## png(output_heatmap_plot) | |
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
376 | |
377 bitmap(output_heatmap_plot,type="png16m",width=10.5) | |
378 } | |
379 | |
380 etable2 <- topTags(lrt, n=100)\$table | |
381 order <- rownames(etable2) | |
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] | |
383 heatmap(t(cpm_sub)) | |
384 dev.off() | |
385 } | |
386 | |
387 ##output_hierarchical_clustering_plot = args[13] | |
388 | |
389 if(output_RData_obj != "/dev/null") { | |
390 save.image(output_RData_obj) | |
391 } | |
392 | |
393 write("Done!",stdout()) | |
394 } | |
395 ]]> | |
396 </configfile> | |
397 </configfiles> | |
398 | |
399 <inputs> | |
400 <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" /> | |
401 <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." /> | |
402 | |
403 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." /> | |
404 | |
405 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" /> | |
406 | |
407 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes"> | |
408 <option value="make_output_raw_counts">Raw counts table</option> | |
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> | |
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> | |
411 <option value="make_output_BCVplot">BCV-plot</option> | |
412 <option value="make_output_MAplot">MA-plot</option> | |
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> | |
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> | |
415 <option value="make_output_heatmap_plot">Heatmap</option> | |
416 | |
417 <option value="make_output_R_stdout">R stdout</option> | |
418 <option value="make_output_RData_obj">R Data object</option> | |
419 </param> | |
420 | |
421 <param name="output_format_images" type="select" label="Output format of images" display="radio"> | |
422 <option value="png">Portable network graphics (.png)</option> | |
423 <option value="pdf">Portable document format (.pdf)</option> | |
424 <option value="svg">Scalable vector graphics (.svg)</option> | |
425 </param> | |
426 </inputs> | |
427 | |
428 <outputs> | |
429 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" /> | |
430 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" /> | |
431 | |
432 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts"> | |
433 <filter>outputs and ("make_output_raw_counts" in outputs)</filter> | |
434 </data> | |
435 | |
436 <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (logFC method)"> | |
437 <filter>outputs and ("make_output_MDSplot_logFC" in outputs)</filter> | |
438 | |
439 <change_format> | |
440 <when input="output_format_images" value="png" format="png" /> | |
441 <when input="output_format_images" value="pdf" format="pdf" /> | |
442 <when input="output_format_images" value="svg" format="svg" /> | |
443 </change_format> | |
444 </data> | |
445 | |
446 <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)"> | |
447 <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter> | |
448 | |
449 <change_format> | |
450 <when input="output_format_images" value="png" format="png" /> | |
451 <when input="output_format_images" value="pdf" format="pdf" /> | |
452 <when input="output_format_images" value="svg" format="svg" /> | |
453 </change_format> | |
454 </data> | |
455 | |
456 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot"> | |
457 <filter>outputs and ("make_output_BCVplot" in outputs)</filter> | |
458 | |
459 <change_format> | |
460 <when input="output_format_images" value="png" format="png" /> | |
461 <when input="output_format_images" value="pdf" format="pdf" /> | |
462 <when input="output_format_images" value="svg" format="svg" /> | |
463 </change_format> | |
464 </data> | |
465 | |
466 <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot"> | |
467 <filter>outputs and ("make_output_MAplot" in outputs)</filter> | |
468 | |
469 <change_format> | |
470 <when input="output_format_images" value="png" format="png" /> | |
471 <when input="output_format_images" value="pdf" format="pdf" /> | |
472 <when input="output_format_images" value="svg" format="svg" /> | |
473 </change_format> | |
474 </data> | |
475 | |
476 <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution"> | |
477 <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter> | |
478 | |
479 <change_format> | |
480 <when input="output_format_images" value="png" format="png" /> | |
481 <when input="output_format_images" value="pdf" format="pdf" /> | |
482 <when input="output_format_images" value="svg" format="svg" /> | |
483 </change_format> | |
484 </data> | |
485 | |
486 <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering"> | |
487 <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter> | |
488 | |
489 <change_format> | |
490 <when input="output_format_images" value="png" format="png" /> | |
491 <when input="output_format_images" value="pdf" format="pdf" /> | |
492 <when input="output_format_images" value="svg" format="svg" /> | |
493 </change_format> | |
494 </data> | |
495 | |
496 <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap"> | |
497 <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter> | |
498 | |
499 <change_format> | |
500 <when input="output_format_images" value="png" format="png" /> | |
501 <when input="output_format_images" value="pdf" format="pdf" /> | |
502 <when input="output_format_images" value="svg" format="svg" /> | |
503 </change_format> | |
504 </data> | |
505 | |
506 <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object"> | |
507 <filter>outputs and ("make_output_RData_obj" in outputs)</filter> | |
508 </data> | |
509 | |
510 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" > | |
511 <filter>outputs and ("make_output_R_stdout" in outputs)</filter> | |
512 </data> | |
513 </outputs> | |
514 | |
515 <tests> | |
516 <test> | |
517 <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" /> | |
518 <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" /> | |
519 | |
520 <param name="contrast" value="E-C"/> | |
521 | |
522 <param name="fdr" value="0.05" /> | |
523 | |
524 <param name="output_format_images" value="png" /> | |
525 | |
526 <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" /> | |
527 </test> | |
528 </tests> | |
529 | |
530 <help> | |
531 edgeR: Differential Gene(Expression) Analysis | |
532 ############################################# | |
533 | |
534 Overview | |
535 -------- | |
536 Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication. Uses empirical Bayes estimation and exact tests based on the negative binomial distribution. Also useful for differential signal analysis with other types of genome-scale count data [1]. | |
537 | |
538 For every experiment, the algorithm requires a design matrix. This matrix describes which samples belong to which groups. | |
539 More details on this are given in the edgeR manual: http://www.bioconductor.org/packages/2.12/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf | |
540 and the limma manual. | |
541 | |
542 Because the creation of a design matrix can be complex and time consuming, especially if no GUI is used, this package comes with an alternative tool which can help you with it. | |
543 This tool is called *edgeR Design Matrix Creator*. | |
544 If the appropriate design matrix (with corresponding links to the files) is given, | |
545 the correct contrast ( http://en.wikipedia.org/wiki/Contrast_(statistics) ) has to be given. | |
546 | |
547 If you have for example two groups, with an equal weight, you would like to compare either | |
548 "g1-g2" or "normal-cancer". | |
549 | |
550 The test function makes use of a MCF7 dataset used in a study that indicates that a higher sequencing depth is not neccesairily more important than a higher amount of replaciates[2]. | |
551 | |
552 Input | |
553 ----- | |
554 Expression matrix | |
555 ^^^^^^^^^^^^^^^^^ | |
556 :: | |
557 | |
558 Geneid "\t" Sample-1 "\t" Sample-2 "\t" Sample-3 "\t" Sample-4 [...] "\n" | |
559 SMURF "\t" 123 "\t" 21 "\t" 34545 "\t" 98 ... "\n" | |
560 BRCA1 "\t" 435 "\t" 6655 "\t" 45 "\t" 55 ... "\n" | |
561 LINK33 "\t" 4 "\t" 645 "\t" 345 "\t" 1 ... "\n" | |
562 SNORD78 "\t" 498 "\t" 65 "\t" 98 "\t" 27 ... "\n" | |
563 [...] | |
564 | |
565 *Note: Make sure the number of columns in the header is identical to the number of columns in the body.* | |
566 | |
567 Design matrix | |
568 ^^^^^^^^^^^^^ | |
569 :: | |
570 | |
571 Sample "\t" Condition "\t" Ethnicity "\t" Patient "\t" Batch "\n" | |
572 Sample-1 "\t" Tumor "\t" European "\t" 1 "\t" 1 "\n" | |
573 Sample-2 "\t" Normal "\t" European "\t" 1 "\t" 1 "\n" | |
574 Sample-3 "\t" Tumor "\t" European "\t" 2 "\t" 1 "\n" | |
575 Sample-4 "\t" Normal "\t" European "\t" 2 "\t" 1 "\n" | |
576 Sample-5 "\t" Tumor "\t" African "\t" 3 "\t" 1 "\n" | |
577 Sample-6 "\t" Normal "\t" African "\t" 3 "\t" 1 "\n" | |
578 Sample-7 "\t" Tumor "\t" African "\t" 4 "\t" 2 "\n" | |
579 Sample-8 "\t" Normal "\t" African "\t" 4 "\t" 2 "\n" | |
580 Sample-9 "\t" Tumor "\t" Asian "\t" 5 "\t" 2 "\n" | |
581 Sample-10 "\t" Normal "\t" Asian "\t" 5 "\t" 2 "\n" | |
582 Sample-11 "\t" Tumor "\t" Asian "\t" 6 "\t" 2 "\n" | |
583 Sample-12 "\t" Normal "\t" Asian "\t" 6 "\t" 2 "\n" | |
584 | |
585 *Note: Avoid factor names that are (1) numerical, (2) contain mathematical symbols and preferebly only use letters.* | |
586 | |
587 Contrast | |
588 ^^^^^^^^ | |
589 The contrast represents the biological question. There can be many questions asked, e.g.: | |
590 | |
591 - Tumor-Normal | |
592 - African-European | |
593 - 0.5*(Control+Placebo) / Treated | |
594 | |
595 Installation | |
596 ------------ | |
597 | |
598 This tool requires no specific configurations. The following dependencies are installed automatically: | |
599 | |
600 - R | |
601 - limma | |
602 - edgeR | |
603 | |
604 License | |
605 ------- | |
606 - R | |
607 - GPL 2 & GPL 3 | |
608 - limma | |
609 - GPL (>=2) | |
610 - edgeR | |
611 - GPL (>=2) | |
612 | |
613 References | |
614 ---------- | |
615 | |
616 EdgeR | |
617 ^^^^^ | |
618 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.** | |
619 | |
620 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140. | |
621 | |
622 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html | |
623 - http://dx.doi.org/10.1093/bioinformatics/btp616 | |
624 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | |
625 | |
626 Test-data (MCF7) | |
627 ^^^^^^^^^^^^^^^^ | |
628 **[2] RNA-seq differential expression studies: more sequence or more replication?** | |
629 | |
630 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304. | |
631 | |
632 - http://www.ncbi.nlm.nih.gov/pubmed/24319002 | |
633 - http://dx.doi.org/10.1093/bioinformatics/btt688 | |
634 | |
635 @CONTACT@ | |
636 </help> | |
637 | |
638 <expand macro="citations" /> | |
639 </tool> |