Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Differential_Gene_Expression.xml @ 2:ec951a5017f8 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit a6cf3ec153ca4a3846258a223d287ca125eea7be
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 09:15:07 -0400 |
parents | a4a4c88783ea |
children | 12fb0d4b1e93 |
comparison
equal
deleted
inserted
replaced
1:a4a4c88783ea | 2:ec951a5017f8 |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | 1 <?xml version="1.0" encoding="UTF-8"?> |
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> | 2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.b"> |
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description> | 3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description> |
4 | 4 |
5 <macros> | 5 <macros> |
6 <import>edgeR_macros.xml</import> | 6 <import>edgeR_macros.xml</import> |
7 </macros> | 7 </macros> |
28 source="stderr" | 28 source="stderr" |
29 level="warning" | 29 level="warning" |
30 description="LOCALE has not been set correctly" /> | 30 description="LOCALE has not been set correctly" /> |
31 </stdio> | 31 </stdio> |
32 | 32 |
33 <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> | 33 <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> |
34 | 34 |
35 <command> | 35 <command> |
36 R --vanilla --slave -f $R_script '--args | 36 R --vanilla --slave -f $R_script '--args |
37 $expression_matrix | 37 $expression_matrix |
38 $design_matrix | 38 $design_matrix |
99 /dev/null | 99 /dev/null |
100 #end if | 100 #end if |
101 | 101 |
102 $output_format_images | 102 $output_format_images |
103 ' | 103 ' |
104 #if $output_R: | |
105 > $output_R | |
106 #else: | |
107 > /dev/null | |
108 #end if | |
109 </command> | 104 </command> |
110 | 105 |
111 <configfiles> | 106 <configfiles> |
112 <configfile name="R_script"> | 107 <configfile name="R_script"> |
113 <![CDATA[ | 108 <![CDATA[ |
216 dge <- estimateGLMTrendedDisp(dge,design) | 211 dge <- estimateGLMTrendedDisp(dge,design) |
217 write("Estimating tagwise dispersion...",stdout()) | 212 write("Estimating tagwise dispersion...",stdout()) |
218 dge <- estimateGLMTagwiseDisp(dge,design) | 213 dge <- estimateGLMTagwiseDisp(dge,design) |
219 | 214 |
220 | 215 |
221 if(output_MDSplot_logFC != "/dev/null") { | 216 # hierarchical clustering makes use of the distance of the MDS |
222 write("Creating MDS plot (logFC method)",stdout()) | 217 if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { |
223 points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | 218 write("Calculating MDS plot (logFC method)",stdout()) |
219 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | |
224 dev.off()# Kill it | 220 dev.off()# Kill it |
225 | 221 |
226 if(output_format_images == "pdf") { | 222 if(output_MDSplot_logFC != "/dev/null") { |
227 pdf(output_MDSplot_logFC,height=14,width=14) | 223 write("Creating MDS plot (logFC method)",stdout()) |
228 } else if(output_format_images == "svg") { | 224 if(output_format_images == "pdf") { |
229 svg(output_MDSplot_logFC,height=14,width=14) | 225 pdf(output_MDSplot_logFC,height=14,width=14) |
230 } else { | 226 } else if(output_format_images == "svg") { |
231 ## png(output_MDSplot_logFC) | 227 svg(output_MDSplot_logFC,height=14,width=14) |
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 228 } else { |
233 | 229 ## png(output_MDSplot_logFC) |
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) | 230 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
235 } | 231 |
236 | 232 bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3) |
237 | 233 } |
238 diff_x <- abs(max(points\$x)-min(points\$x)) | 234 |
239 diff_y <-(max(points\$y)-min(points\$y)) | 235 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) |
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") | 236 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) |
241 points(points\$x,points\$y,pch=20) | 237 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") |
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | 238 points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) |
243 rm(diff_x,diff_y) | 239 text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
244 | 240 rm(diff_x,diff_y) |
245 dev.off() | 241 |
246 } | 242 dev.off() |
243 } | |
244 } | |
245 | |
247 | 246 |
248 if(output_MDSplot_bcv != "/dev/null") { | 247 if(output_MDSplot_bcv != "/dev/null") { |
249 write("Creating MDS plot (bcv method)",stdout()) | 248 write("Creating MDS plot (bcv method)",stdout()) |
250 | 249 |
251 ## 1. First create a virtual plot to obtain the desired coordinates | 250 ## 1. First create a virtual plot to obtain the desired coordinates |
252 pdf("bcvmds.pdf") | 251 pdf("bcvmds.pdf") |
253 points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) | 252 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) |
254 dev.off()# Kill it | 253 dev.off()# Kill it |
255 | 254 |
256 ## 2. Re-plot the coordinates in a new figure with the size and settings. | 255 ## 2. Re-plot the coordinates in a new figure with the size and settings. |
257 if(output_format_images == "pdf") { | 256 if(output_format_images == "pdf") { |
258 pdf(output_MDSplot_bcv,height=14,width=14) | 257 pdf(output_MDSplot_bcv,height=14,width=14) |
260 svg(output_MDSplot_bcv,height=14,width=14) | 259 svg(output_MDSplot_bcv,height=14,width=14) |
261 } else { | 260 } else { |
262 ## png(output_MDSplot_bcv) | 261 ## png(output_MDSplot_bcv) |
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 262 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
264 | 263 |
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) | 264 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) |
266 } | 265 } |
267 | 266 |
268 diff_x <- abs(max(points\$x)-min(points\$x)) | 267 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) |
269 diff_y <- (max(points\$y)-min(points\$y)) | 268 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) |
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") | 269 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") |
271 points(points\$x,points\$y,pch=20) | 270 points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) |
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | 271 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
273 rm(diff_x,diff_y) | 272 rm(diff_x,diff_y) |
274 | 273 |
275 dev.off() | 274 dev.off() |
276 } | 275 } |
277 | 276 |
285 svg(output_BCVplot) | 284 svg(output_BCVplot) |
286 } else { | 285 } else { |
287 ## png(output_BCVplot) | 286 ## png(output_BCVplot) |
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 287 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
289 | 288 |
290 bitmap(output_BCVplot,type="png16m") | 289 bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3) |
291 } | 290 } |
292 | 291 |
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") | 292 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") |
294 dev.off() | 293 dev.off() |
295 } | 294 } |
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) | 300 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) |
302 cont <- c(contrast) | 301 cont <- c(contrast) |
303 cont <- makeContrasts(contrasts=cont, levels=design) | 302 cont <- makeContrasts(contrasts=cont, levels=design) |
304 | 303 |
305 lrt <- glmLRT(fit, contrast=cont[,1]) | 304 lrt <- glmLRT(fit, contrast=cont[,1]) |
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) | 305 write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout()) |
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) | 306 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) |
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) | 307 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) |
309 | 308 |
310 ## todo EXPORT FPKM | 309 ## todo EXPORT FPKM |
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) | 310 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA) |
323 svg(output_MAplot) | 322 svg(output_MAplot) |
324 } else { | 323 } else { |
325 ## png(output_MAplot) | 324 ## png(output_MAplot) |
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 325 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
327 | 326 |
328 bitmap(output_MAplot,type="png16m") | 327 bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3) |
329 } | 328 } |
330 | 329 |
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) | 330 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) |
332 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) | 331 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red")) |
333 abline(h=c(-1,1), col="blue") | 332 abline(h=c(-1,1), col="blue") |
343 svg(output_PValue_distribution_plot,width=14,height=14) | 342 svg(output_PValue_distribution_plot,width=14,height=14) |
344 } else { | 343 } else { |
345 ## png(output_PValue_distribution_plot) | 344 ## png(output_PValue_distribution_plot) |
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 345 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
347 | 346 |
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) | 347 bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3) |
349 } | 348 } |
350 | 349 |
351 expressed_genes <- subset(etable, PValue < 0.99) | 350 expressed_genes <- subset(etable, PValue < 0.99) |
352 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") | 351 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)") |
353 center <- sum(h\$counts) / length(h\$counts) | 352 center <- sum(h\$counts) / length(h\$counts) |
372 svg(output_heatmap_plot,width=10.5) | 371 svg(output_heatmap_plot,width=10.5) |
373 } else { | 372 } else { |
374 ## png(output_heatmap_plot) | 373 ## png(output_heatmap_plot) |
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | 374 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ |
376 | 375 |
377 bitmap(output_heatmap_plot,type="png16m",width=10.5) | 376 bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3) |
378 } | 377 } |
379 | 378 |
380 etable2 <- topTags(lrt, n=100)\$table | 379 etable2 <- topTags(lrt, n=100)\$table |
381 order <- rownames(etable2) | 380 order <- rownames(etable2) |
382 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] | 381 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),] |
383 heatmap(t(cpm_sub)) | 382 heatmap(t(cpm_sub)) |
384 dev.off() | 383 dev.off() |
385 } | 384 } |
386 | 385 |
387 ##output_hierarchical_clustering_plot = args[13] | 386 if(output_hierarchical_clustering_plot != "/dev/null") { |
387 if(output_hierarchical_clustering_plot == "pdf") { | |
388 pdf(output_hierarchical_clustering_plot,width=10.5) | |
389 } else if(output_hierarchical_clustering_plot == "svg") { | |
390 svg(output_hierarchical_clustering_plot,width=10.5) | |
391 } else { | |
392 ## png(output_hierarchical_clustering_plot) | |
393 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
394 | |
395 bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3) | |
396 } | |
397 | |
398 mds_distance = as.dist(mds_distance_logFC\$distance.matrix) | |
399 clustering = hclust(mds_distance) | |
400 plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance")) | |
401 | |
402 dev.off() | |
403 } | |
388 | 404 |
389 if(output_RData_obj != "/dev/null") { | 405 if(output_RData_obj != "/dev/null") { |
390 save.image(output_RData_obj) | 406 save.image(output_RData_obj) |
391 } | 407 } |
392 | 408 |
409 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> | 425 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> |
410 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> | 426 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> |
411 <option value="make_output_BCVplot">BCV-plot</option> | 427 <option value="make_output_BCVplot">BCV-plot</option> |
412 <option value="make_output_MAplot">MA-plot</option> | 428 <option value="make_output_MAplot">MA-plot</option> |
413 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> | 429 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> |
414 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> | 430 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> |
415 <option value="make_output_heatmap_plot">Heatmap</option> | 431 <option value="make_output_heatmap_plot">Heatmap</option> |
416 | |
417 <option value="make_output_R_stdout">R stdout</option> | |
418 <option value="make_output_RData_obj">R Data object</option> | 432 <option value="make_output_RData_obj">R Data object</option> |
419 </param> | 433 </param> |
420 | 434 |
421 <param name="output_format_images" type="select" label="Output format of images" display="radio"> | 435 <param name="output_format_images" type="select" label="Output format of images" display="radio"> |
422 <option value="png">Portable network graphics (.png)</option> | 436 <option value="png">Portable network graphics (.png)</option> |
593 - 0.5*(Control+Placebo) / Treated | 607 - 0.5*(Control+Placebo) / Treated |
594 | 608 |
595 Installation | 609 Installation |
596 ------------ | 610 ------------ |
597 | 611 |
598 This tool requires no specific configurations. The following dependencies are installed automatically: | 612 This tool requires no specific configuration. The following dependencies will installed automatically: |
599 | 613 |
600 - R | 614 - R |
601 - limma | 615 - limma |
602 - edgeR | 616 - edgeR |
603 | 617 |
608 - limma | 622 - limma |
609 - GPL (>=2) | 623 - GPL (>=2) |
610 - edgeR | 624 - edgeR |
611 - GPL (>=2) | 625 - GPL (>=2) |
612 | 626 |
613 References | |
614 ---------- | |
615 | |
616 EdgeR | |
617 ^^^^^ | |
618 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.** | |
619 | |
620 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140. | |
621 | |
622 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html | |
623 - http://dx.doi.org/10.1093/bioinformatics/btp616 | |
624 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | |
625 | |
626 Test-data (MCF7) | |
627 ^^^^^^^^^^^^^^^^ | |
628 **[2] RNA-seq differential expression studies: more sequence or more replication?** | |
629 | |
630 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304. | |
631 | |
632 - http://www.ncbi.nlm.nih.gov/pubmed/24319002 | |
633 - http://dx.doi.org/10.1093/bioinformatics/btt688 | |
634 | |
635 @CONTACT@ | 627 @CONTACT@ |
636 </help> | 628 </help> |
637 | 629 |
638 <expand macro="citations" /> | 630 <expand macro="citations" /> |
639 </tool> | 631 </tool> |