diff edgeR_Differential_Gene_Expression.xml @ 2:ec951a5017f8 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit a6cf3ec153ca4a3846258a223d287ca125eea7be
author yhoogstrate
date Tue, 01 Sep 2015 09:15:07 -0400
parents a4a4c88783ea
children 12fb0d4b1e93
line wrap: on
line diff
--- a/edgeR_Differential_Gene_Expression.xml	Tue Sep 01 04:59:05 2015 -0400
+++ b/edgeR_Differential_Gene_Expression.xml	Tue Sep 01 09:15:07 2015 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a">
+<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.b">
     <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
     
     <macros>
@@ -30,7 +30,7 @@
                description="LOCALE has not been set correctly" />
     </stdio>
     
-    <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
+    <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
     
     <command>
         R --vanilla --slave -f $R_script '--args
@@ -101,11 +101,6 @@
             
             $output_format_images
             '
-            #if $output_R:
-                > $output_R 
-            #else:
-                > /dev/null
-            #end if
     </command>
     
     <configfiles>
@@ -218,39 +213,43 @@
   dge <- estimateGLMTagwiseDisp(dge,design)
   
   
-  if(output_MDSplot_logFC != "/dev/null") {
-    write("Creating MDS plot (logFC method)",stdout())
-    points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
+  # hierarchical clustering makes use of the distance of the MDS
+  if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") {
+    write("Calculating MDS plot (logFC method)",stdout())
+    mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
     dev.off()# Kill it
     
-    if(output_format_images == "pdf") {
-      pdf(output_MDSplot_logFC,height=14,width=14)
-    } else if(output_format_images == "svg") {
-      svg(output_MDSplot_logFC,height=14,width=14)
-    } else {
-      ## png(output_MDSplot_logFC)
-      ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
+    if(output_MDSplot_logFC != "/dev/null") {  
+      write("Creating MDS plot (logFC method)",stdout())
+      if(output_format_images == "pdf") {
+        pdf(output_MDSplot_logFC,height=14,width=14)
+      } else if(output_format_images == "svg") {
+        svg(output_MDSplot_logFC,height=14,width=14)
+      } else {
+        ## png(output_MDSplot_logFC)
+        ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
+        
+        bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3)
+      }
       
-      bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14)
-    }
-    
-    
-    diff_x <- abs(max(points\$x)-min(points\$x))
-    diff_y <-(max(points\$y)-min(points\$y))
-    plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
-    points(points\$x,points\$y,pch=20)
-    text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
-    rm(diff_x,diff_y)
-    
-    dev.off()
+      diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x))
+      diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y))
+      plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
+      points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20)
+      text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
+      rm(diff_x,diff_y)
+      
+      dev.off()
+      }
   }
   
+  
   if(output_MDSplot_bcv != "/dev/null") {
     write("Creating MDS plot (bcv method)",stdout())
     
     ## 1. First create a virtual plot to obtain the desired coordinates
     pdf("bcvmds.pdf")
-    points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
+    mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
     dev.off()# Kill it
     
     ## 2. Re-plot the coordinates in a new figure with the size and settings.
@@ -262,14 +261,14 @@
       ## png(output_MDSplot_bcv)
       ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
       
-      bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14)
+      bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3)
     }
     
-    diff_x <- abs(max(points\$x)-min(points\$x))
-    diff_y <- (max(points\$y)-min(points\$y))
-    plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
-    points(points\$x,points\$y,pch=20)
-    text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
+    diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x))
+    diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y))
+    plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
+    points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20)
+    text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
     rm(diff_x,diff_y)
     
     dev.off()
@@ -287,7 +286,7 @@
       ## png(output_BCVplot)
       ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
       
-      bitmap(output_BCVplot,type="png16m")
+      bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3)
     }
     
     plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
@@ -303,7 +302,7 @@
   cont <- makeContrasts(contrasts=cont, levels=design)
 
   lrt <- glmLRT(fit, contrast=cont[,1])
-  write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
+  write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
   write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
   write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
 
@@ -325,7 +324,7 @@
         ## png(output_MAplot)
         ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
         
-        bitmap(output_MAplot,type="png16m")
+        bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3)
       }
       
       with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
@@ -345,7 +344,7 @@
         ## png(output_PValue_distribution_plot)
         ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
         
-        bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14)
+        bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3)
       }
       
       expressed_genes <- subset(etable, PValue < 0.99)
@@ -374,7 +373,7 @@
       ## png(output_heatmap_plot)
       ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
       
-      bitmap(output_heatmap_plot,type="png16m",width=10.5)
+      bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3)
     }
     
     etable2 <- topTags(lrt, n=100)\$table
@@ -384,7 +383,24 @@
     dev.off()
   }
   
-  ##output_hierarchical_clustering_plot = args[13]
+  if(output_hierarchical_clustering_plot != "/dev/null") {
+    if(output_hierarchical_clustering_plot == "pdf") {
+      pdf(output_hierarchical_clustering_plot,width=10.5)
+    } else if(output_hierarchical_clustering_plot == "svg") {
+      svg(output_hierarchical_clustering_plot,width=10.5)
+    } else {
+      ## png(output_hierarchical_clustering_plot)
+      ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
+      
+      bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3)
+    }
+    
+    mds_distance = as.dist(mds_distance_logFC\$distance.matrix)
+    clustering = hclust(mds_distance)
+    plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance"))
+    
+    dev.off()
+  }
   
   if(output_RData_obj != "/dev/null") {
     save.image(output_RData_obj)
@@ -411,10 +427,8 @@
             <option value="make_output_BCVplot">BCV-plot</option>
             <option value="make_output_MAplot">MA-plot</option>
             <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
-            <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option>
+            <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option>
             <option value="make_output_heatmap_plot">Heatmap</option>
-            
-            <option value="make_output_R_stdout">R stdout</option>
             <option value="make_output_RData_obj">R Data object</option>
         </param>
         
@@ -595,7 +609,7 @@
 Installation
 ------------
 
-This tool requires no specific configurations. The following dependencies are installed automatically:
+This tool requires no specific configuration. The following dependencies will installed automatically:
 
 - R
 - limma
@@ -610,28 +624,6 @@
 - edgeR
     - GPL (&gt;=2)
 
-References
-----------
-
-EdgeR
-^^^^^
-**[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
-
-*Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
-
-- http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
-- http://dx.doi.org/10.1093/bioinformatics/btp616
-- http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
-
-Test-data (MCF7)
-^^^^^^^^^^^^^^^^
-**[2] RNA-seq differential expression studies: more sequence or more replication?**
-
-*Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
-
-- http://www.ncbi.nlm.nih.gov/pubmed/24319002
-- http://dx.doi.org/10.1093/bioinformatics/btt688
-
 @CONTACT@
     </help>