comparison edgeR_Differential_Gene_Expression.xml @ 2:ec951a5017f8 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit a6cf3ec153ca4a3846258a223d287ca125eea7be
author yhoogstrate
date Tue, 01 Sep 2015 09:15:07 -0400
parents a4a4c88783ea
children 12fb0d4b1e93
comparison
equal deleted inserted replaced
1:a4a4c88783ea 2:ec951a5017f8
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> 2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.b">
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description> 3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
4 4
5 <macros> 5 <macros>
6 <import>edgeR_macros.xml</import> 6 <import>edgeR_macros.xml</import>
7 </macros> 7 </macros>
28 source="stderr" 28 source="stderr"
29 level="warning" 29 level="warning"
30 description="LOCALE has not been set correctly" /> 30 description="LOCALE has not been set correctly" />
31 </stdio> 31 </stdio>
32 32
33 <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command> 33 <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
34 34
35 <command> 35 <command>
36 R --vanilla --slave -f $R_script '--args 36 R --vanilla --slave -f $R_script '--args
37 $expression_matrix 37 $expression_matrix
38 $design_matrix 38 $design_matrix
99 /dev/null 99 /dev/null
100 #end if 100 #end if
101 101
102 $output_format_images 102 $output_format_images
103 ' 103 '
104 #if $output_R:
105 > $output_R
106 #else:
107 > /dev/null
108 #end if
109 </command> 104 </command>
110 105
111 <configfiles> 106 <configfiles>
112 <configfile name="R_script"> 107 <configfile name="R_script">
113 <![CDATA[ 108 <![CDATA[
216 dge <- estimateGLMTrendedDisp(dge,design) 211 dge <- estimateGLMTrendedDisp(dge,design)
217 write("Estimating tagwise dispersion...",stdout()) 212 write("Estimating tagwise dispersion...",stdout())
218 dge <- estimateGLMTagwiseDisp(dge,design) 213 dge <- estimateGLMTagwiseDisp(dge,design)
219 214
220 215
221 if(output_MDSplot_logFC != "/dev/null") { 216 # hierarchical clustering makes use of the distance of the MDS
222 write("Creating MDS plot (logFC method)",stdout()) 217 if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") {
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot 218 write("Calculating MDS plot (logFC method)",stdout())
219 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
224 dev.off()# Kill it 220 dev.off()# Kill it
225 221
226 if(output_format_images == "pdf") { 222 if(output_MDSplot_logFC != "/dev/null") {
227 pdf(output_MDSplot_logFC,height=14,width=14) 223 write("Creating MDS plot (logFC method)",stdout())
228 } else if(output_format_images == "svg") { 224 if(output_format_images == "pdf") {
229 svg(output_MDSplot_logFC,height=14,width=14) 225 pdf(output_MDSplot_logFC,height=14,width=14)
230 } else { 226 } else if(output_format_images == "svg") {
231 ## png(output_MDSplot_logFC) 227 svg(output_MDSplot_logFC,height=14,width=14)
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 228 } else {
233 229 ## png(output_MDSplot_logFC)
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) 230 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
235 } 231
236 232 bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3)
237 233 }
238 diff_x <- abs(max(points\$x)-min(points\$x)) 234
239 diff_y <-(max(points\$y)-min(points\$y)) 235 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x))
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") 236 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y))
241 points(points\$x,points\$y,pch=20) 237 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) 238 points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20)
243 rm(diff_x,diff_y) 239 text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
244 240 rm(diff_x,diff_y)
245 dev.off() 241
246 } 242 dev.off()
243 }
244 }
245
247 246
248 if(output_MDSplot_bcv != "/dev/null") { 247 if(output_MDSplot_bcv != "/dev/null") {
249 write("Creating MDS plot (bcv method)",stdout()) 248 write("Creating MDS plot (bcv method)",stdout())
250 249
251 ## 1. First create a virtual plot to obtain the desired coordinates 250 ## 1. First create a virtual plot to obtain the desired coordinates
252 pdf("bcvmds.pdf") 251 pdf("bcvmds.pdf")
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) 252 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
254 dev.off()# Kill it 253 dev.off()# Kill it
255 254
256 ## 2. Re-plot the coordinates in a new figure with the size and settings. 255 ## 2. Re-plot the coordinates in a new figure with the size and settings.
257 if(output_format_images == "pdf") { 256 if(output_format_images == "pdf") {
258 pdf(output_MDSplot_bcv,height=14,width=14) 257 pdf(output_MDSplot_bcv,height=14,width=14)
260 svg(output_MDSplot_bcv,height=14,width=14) 259 svg(output_MDSplot_bcv,height=14,width=14)
261 } else { 260 } else {
262 ## png(output_MDSplot_bcv) 261 ## png(output_MDSplot_bcv)
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 262 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
264 263
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) 264 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3)
266 } 265 }
267 266
268 diff_x <- abs(max(points\$x)-min(points\$x)) 267 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x))
269 diff_y <- (max(points\$y)-min(points\$y)) 268 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y))
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") 269 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
271 points(points\$x,points\$y,pch=20) 270 points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20)
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) 271 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
273 rm(diff_x,diff_y) 272 rm(diff_x,diff_y)
274 273
275 dev.off() 274 dev.off()
276 } 275 }
277 276
285 svg(output_BCVplot) 284 svg(output_BCVplot)
286 } else { 285 } else {
287 ## png(output_BCVplot) 286 ## png(output_BCVplot)
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 287 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
289 288
290 bitmap(output_BCVplot,type="png16m") 289 bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3)
291 } 290 }
292 291
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") 292 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
294 dev.off() 293 dev.off()
295 } 294 }
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) 300 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
302 cont <- c(contrast) 301 cont <- c(contrast)
303 cont <- makeContrasts(contrasts=cont, levels=design) 302 cont <- makeContrasts(contrasts=cont, levels=design)
304 303
305 lrt <- glmLRT(fit, contrast=cont[,1]) 304 lrt <- glmLRT(fit, contrast=cont[,1])
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) 305 write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) 306 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) 307 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
309 308
310 ## todo EXPORT FPKM 309 ## todo EXPORT FPKM
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) 310 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
323 svg(output_MAplot) 322 svg(output_MAplot)
324 } else { 323 } else {
325 ## png(output_MAplot) 324 ## png(output_MAplot)
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 325 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
327 326
328 bitmap(output_MAplot,type="png16m") 327 bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3)
329 } 328 }
330 329
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) 330 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) 331 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red"))
333 abline(h=c(-1,1), col="blue") 332 abline(h=c(-1,1), col="blue")
343 svg(output_PValue_distribution_plot,width=14,height=14) 342 svg(output_PValue_distribution_plot,width=14,height=14)
344 } else { 343 } else {
345 ## png(output_PValue_distribution_plot) 344 ## png(output_PValue_distribution_plot)
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 345 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
347 346
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) 347 bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3)
349 } 348 }
350 349
351 expressed_genes <- subset(etable, PValue < 0.99) 350 expressed_genes <- subset(etable, PValue < 0.99)
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") 351 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)")
353 center <- sum(h\$counts) / length(h\$counts) 352 center <- sum(h\$counts) / length(h\$counts)
372 svg(output_heatmap_plot,width=10.5) 371 svg(output_heatmap_plot,width=10.5)
373 } else { 372 } else {
374 ## png(output_heatmap_plot) 373 ## png(output_heatmap_plot)
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ 374 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
376 375
377 bitmap(output_heatmap_plot,type="png16m",width=10.5) 376 bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3)
378 } 377 }
379 378
380 etable2 <- topTags(lrt, n=100)\$table 379 etable2 <- topTags(lrt, n=100)\$table
381 order <- rownames(etable2) 380 order <- rownames(etable2)
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] 381 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
383 heatmap(t(cpm_sub)) 382 heatmap(t(cpm_sub))
384 dev.off() 383 dev.off()
385 } 384 }
386 385
387 ##output_hierarchical_clustering_plot = args[13] 386 if(output_hierarchical_clustering_plot != "/dev/null") {
387 if(output_hierarchical_clustering_plot == "pdf") {
388 pdf(output_hierarchical_clustering_plot,width=10.5)
389 } else if(output_hierarchical_clustering_plot == "svg") {
390 svg(output_hierarchical_clustering_plot,width=10.5)
391 } else {
392 ## png(output_hierarchical_clustering_plot)
393 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
394
395 bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3)
396 }
397
398 mds_distance = as.dist(mds_distance_logFC\$distance.matrix)
399 clustering = hclust(mds_distance)
400 plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance"))
401
402 dev.off()
403 }
388 404
389 if(output_RData_obj != "/dev/null") { 405 if(output_RData_obj != "/dev/null") {
390 save.image(output_RData_obj) 406 save.image(output_RData_obj)
391 } 407 }
392 408
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> 425 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option>
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> 426 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option>
411 <option value="make_output_BCVplot">BCV-plot</option> 427 <option value="make_output_BCVplot">BCV-plot</option>
412 <option value="make_output_MAplot">MA-plot</option> 428 <option value="make_output_MAplot">MA-plot</option>
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> 429 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> 430 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option>
415 <option value="make_output_heatmap_plot">Heatmap</option> 431 <option value="make_output_heatmap_plot">Heatmap</option>
416
417 <option value="make_output_R_stdout">R stdout</option>
418 <option value="make_output_RData_obj">R Data object</option> 432 <option value="make_output_RData_obj">R Data object</option>
419 </param> 433 </param>
420 434
421 <param name="output_format_images" type="select" label="Output format of images" display="radio"> 435 <param name="output_format_images" type="select" label="Output format of images" display="radio">
422 <option value="png">Portable network graphics (.png)</option> 436 <option value="png">Portable network graphics (.png)</option>
593 - 0.5*(Control+Placebo) / Treated 607 - 0.5*(Control+Placebo) / Treated
594 608
595 Installation 609 Installation
596 ------------ 610 ------------
597 611
598 This tool requires no specific configurations. The following dependencies are installed automatically: 612 This tool requires no specific configuration. The following dependencies will installed automatically:
599 613
600 - R 614 - R
601 - limma 615 - limma
602 - edgeR 616 - edgeR
603 617
608 - limma 622 - limma
609 - GPL (&gt;=2) 623 - GPL (&gt;=2)
610 - edgeR 624 - edgeR
611 - GPL (&gt;=2) 625 - GPL (&gt;=2)
612 626
613 References
614 ----------
615
616 EdgeR
617 ^^^^^
618 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
619
620 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
621
622 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
623 - http://dx.doi.org/10.1093/bioinformatics/btp616
624 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
625
626 Test-data (MCF7)
627 ^^^^^^^^^^^^^^^^
628 **[2] RNA-seq differential expression studies: more sequence or more replication?**
629
630 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
631
632 - http://www.ncbi.nlm.nih.gov/pubmed/24319002
633 - http://dx.doi.org/10.1093/bioinformatics/btt688
634
635 @CONTACT@ 627 @CONTACT@
636 </help> 628 </help>
637 629
638 <expand macro="citations" /> 630 <expand macro="citations" />
639 </tool> 631 </tool>