Mercurial > repos > yhoogstrate > edger_with_design_matrix
diff edgeR_Differential_Gene_Expression.xml @ 2:ec951a5017f8 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit a6cf3ec153ca4a3846258a223d287ca125eea7be
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 09:15:07 -0400 |
parents | a4a4c88783ea |
children | 12fb0d4b1e93 |
line wrap: on
line diff
--- a/edgeR_Differential_Gene_Expression.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/edgeR_Differential_Gene_Expression.xml Tue Sep 01 09:15:07 2015 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> +<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.b"> <description>RNA-Seq gene expression analysis using edgeR (R package)</description> <macros> @@ -30,7 +30,7 @@ description="LOCALE has not been set correctly" /> </stdio> - <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> + <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> <command> R --vanilla --slave -f $R_script '--args @@ -101,11 +101,6 @@ $output_format_images ' - #if $output_R: - > $output_R - #else: - > /dev/null - #end if </command> <configfiles> @@ -218,39 +213,43 @@ dge <- estimateGLMTagwiseDisp(dge,design) - if(output_MDSplot_logFC != "/dev/null") { - write("Creating MDS plot (logFC method)",stdout()) - points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot + # hierarchical clustering makes use of the distance of the MDS + if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { + write("Calculating MDS plot (logFC method)",stdout()) + mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot dev.off()# Kill it - if(output_format_images == "pdf") { - pdf(output_MDSplot_logFC,height=14,width=14) - } else if(output_format_images == "svg") { - svg(output_MDSplot_logFC,height=14,width=14) - } else { - ## png(output_MDSplot_logFC) - ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + if(output_MDSplot_logFC != "/dev/null") { + write("Creating MDS plot (logFC method)",stdout()) + if(output_format_images == "pdf") { + pdf(output_MDSplot_logFC,height=14,width=14) + } else if(output_format_images == "svg") { + svg(output_MDSplot_logFC,height=14,width=14) + } else { + ## png(output_MDSplot_logFC) + ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + + bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3) + } - bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) - } - - - diff_x <- abs(max(points\$x)-min(points\$x)) - diff_y <-(max(points\$y)-min(points\$y)) - plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") - points(points\$x,points\$y,pch=20) - text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) - rm(diff_x,diff_y) - - dev.off() + diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) + diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) + plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") + points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) + text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) + rm(diff_x,diff_y) + + dev.off() + } } + if(output_MDSplot_bcv != "/dev/null") { write("Creating MDS plot (bcv method)",stdout()) ## 1. First create a virtual plot to obtain the desired coordinates pdf("bcvmds.pdf") - points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) + mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) dev.off()# Kill it ## 2. Re-plot the coordinates in a new figure with the size and settings. @@ -262,14 +261,14 @@ ## png(output_MDSplot_bcv) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) + bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) } - diff_x <- abs(max(points\$x)-min(points\$x)) - diff_y <- (max(points\$y)-min(points\$y)) - plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") - points(points\$x,points\$y,pch=20) - text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) + diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) + diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) + plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") + points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) + text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) rm(diff_x,diff_y) dev.off() @@ -287,7 +286,7 @@ ## png(output_BCVplot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_BCVplot,type="png16m") + bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3) } plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") @@ -303,7 +302,7 @@ cont <- makeContrasts(contrasts=cont, levels=design) lrt <- glmLRT(fit, contrast=cont[,1]) - write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) + write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout()) write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) @@ -325,7 +324,7 @@ ## png(output_MAplot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_MAplot,type="png16m") + bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3) } with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) @@ -345,7 +344,7 @@ ## png(output_PValue_distribution_plot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) + bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3) } expressed_genes <- subset(etable, PValue < 0.99) @@ -374,7 +373,7 @@ ## png(output_heatmap_plot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_heatmap_plot,type="png16m",width=10.5) + bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3) } etable2 <- topTags(lrt, n=100)\$table @@ -384,7 +383,24 @@ dev.off() } - ##output_hierarchical_clustering_plot = args[13] + if(output_hierarchical_clustering_plot != "/dev/null") { + if(output_hierarchical_clustering_plot == "pdf") { + pdf(output_hierarchical_clustering_plot,width=10.5) + } else if(output_hierarchical_clustering_plot == "svg") { + svg(output_hierarchical_clustering_plot,width=10.5) + } else { + ## png(output_hierarchical_clustering_plot) + ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + + bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3) + } + + mds_distance = as.dist(mds_distance_logFC\$distance.matrix) + clustering = hclust(mds_distance) + plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance")) + + dev.off() + } if(output_RData_obj != "/dev/null") { save.image(output_RData_obj) @@ -411,10 +427,8 @@ <option value="make_output_BCVplot">BCV-plot</option> <option value="make_output_MAplot">MA-plot</option> <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> - <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> + <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> <option value="make_output_heatmap_plot">Heatmap</option> - - <option value="make_output_R_stdout">R stdout</option> <option value="make_output_RData_obj">R Data object</option> </param> @@ -595,7 +609,7 @@ Installation ------------ -This tool requires no specific configurations. The following dependencies are installed automatically: +This tool requires no specific configuration. The following dependencies will installed automatically: - R - limma @@ -610,28 +624,6 @@ - edgeR - GPL (>=2) -References ----------- - -EdgeR -^^^^^ -**[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.** - -*Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140. - -- http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html -- http://dx.doi.org/10.1093/bioinformatics/btp616 -- http://www.bioconductor.org/packages/release/bioc/html/edgeR.html - -Test-data (MCF7) -^^^^^^^^^^^^^^^^ -**[2] RNA-seq differential expression studies: more sequence or more replication?** - -*Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304. - -- http://www.ncbi.nlm.nih.gov/pubmed/24319002 -- http://dx.doi.org/10.1093/bioinformatics/btt688 - @CONTACT@ </help>