changeset 9:70c0c8757f5f draft

planemo upload commit 9d3e0b226140b566fc529fd0ffe7aa9e8388c6e5-dirty
author proteore
date Fri, 21 Sep 2018 05:32:38 -0400
parents ddaa0c318d65
children e3430084c996
files enrichment_v3.R topGO.xml
diffstat 2 files changed, 42 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/enrichment_v3.R	Fri Mar 23 10:43:32 2018 -0400
+++ b/enrichment_v3.R	Fri Sep 21 05:32:38 2018 -0400
@@ -30,9 +30,10 @@
 #	results file, barplot.png for the barplot image file and dotplot.png for the
 #	dotplot image file 
 
+options(warn=-1)  #TURN OFF WARNINGS !!!!!!
 
 # loading topGO library
-library(topGO)
+suppressMessages(library(topGO))
 
 # Read file and return file content as data.frame
 readfile = function(filename, header) {
@@ -54,6 +55,11 @@
   return(file)
 } 
 
+check_ens_ids <- function(vector) {
+  ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
+  return(grepl(ens_pattern,vector))
+}
+
 '%!in%' <- function(x,y)!('%in%'(x,y))
 
 
@@ -83,6 +89,10 @@
     stop("Not enough/Too many arguments", call. = FALSE)
 }
 
+#save(options.args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda")
+#load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda")
+
+
 typeinput = options.args[1]
 listfile = options.args[2]
 onto = as.character(options.args[3])
@@ -108,8 +118,14 @@
     sample = readfile(listfile, "false")
   }
   sample = sample[,column]
+}
 
+#check of ENS ids
+if (! any(check_ens_ids(sample))){
+  print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file")
+  stop()
 }
+
 # Launch enrichment analysis and return result data from the analysis or the null
 # object if the enrichment could not be done.
 goEnrichment = function(geneuniverse,sample,onto){
@@ -239,11 +255,8 @@
 	count = data$Significant
   
 	labely = paste("GO terms",onto,sep=" ")
-	png(filename="dotplot.png",res=300, width = 3200, height = 3200, units = "px")
-	sp1 = ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point() + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n") 
-
-	plot(sp1)
-	dev.off()
+	ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) 
+  ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
 }
 
 createBarPlot = function(data, onto){
@@ -255,12 +268,10 @@
   values = as.numeric(values)
   goTerms = data$Term
 	count = data$Significant
-	png(filename="barplot.png",res=300, width = 3200, height = 3200, units = "px")
 	
 	labely = paste("GO terms",onto,sep=" ")
-  p<-ggplot(data, aes(x=goTerms, y=count,fill=values)) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") 
-	plot(p)
-	dev.off()
+  ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") 
+  ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
 }
 
 
@@ -342,16 +353,15 @@
 	
 		createDotPlot(cut_result, onto)
 	}
-  return(TRUE)
 }
 
 
 
 # Load R library ggplot2 to plot graphs
-library(ggplot2)
+suppressMessages(library(ggplot2))
 
 # Launch enrichment analysis
-allresult = goEnrichment(geneuniverse,sample,onto)
+allresult = suppressMessages(goEnrichment(geneuniverse,sample,onto))
 result = allresult[1][[1]]
 myGOdata = allresult[2][[1]]
 if (!is.null(result)){
--- a/topGO.xml	Fri Mar 23 10:43:32 2018 -0400
+++ b/topGO.xml	Fri Sep 21 05:32:38 2018 -0400
@@ -1,15 +1,20 @@
-<tool id="topGO" name="topGO" version="0.1.0">
+<tool id="topGO" name="topGO" version="2018.09.21">
     <description>
     Enrichment analysis for Gene Ontology
     </description>
     <requirements>
         <requirement type="package" version="3.4.1">R</requirement>
-        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
-        <requirement type="package" version="3.4.1">bioconductor-org.hs.eg.db</requirement>
-        <requirement type="package" version="1.54.0">bioconductor-graph</requirement>
-        <requirement type="package" version="1.38.2">bioconductor-annotationdbi</requirement>
-        <requirement type="package" version="3.4.1">bioconductor-go.db</requirement>
-        <requirement type="package" version="2.28.0">bioconductor-topgo</requirement>
+        <requirement type="package" version="3.0.0">r-ggplot2</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.ce.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.dm.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.sc.sgd.db</requirement>
+        <!--requirement type="package" version="3.5.0">bioconductor-org.at.tair.db</requirement-->
+        <requirement type="package" version="1.56.0">bioconductor-graph</requirement>
+        <requirement type="package" version="1.40.0">bioconductor-annotationdbi</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-go.db</requirement>
+        <requirement type="package" version="2.30.0">bioconductor-topgo</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
@@ -73,7 +78,7 @@
     </when>
     <when value="file_all">
       <param name="genelist" type="data" format="txt,tabular" label="Choose an input file" help="This file must imperatively have 1 column filled with IDs consistent with the database that will be used. Please use the MappingIDs component if this is not the case."/>
-      <param name="column" type="text" label="Please specify the column where you would like to apply the comparison (e.g : Enter c1)" value="c1"/> 
+      <param name="column" type="text" label="Please specify the column where your Ensembl IDs are (e.g : Enter 'c1' for column n°1..)" value="c1"/> 
     
       <param name="header" type="select" label="Does your file have a header?" multiple="false" optional="false"> 
  	<option value="TRUE" selected="true">Yes</option>
@@ -82,12 +87,12 @@
     </when>
   </conditional>
       <param name="geneuniverse" type="select" label="Select a specie">
-        <option value="org.At.tair.db" >Arabidopsis</option>
-        <option value="org.Ce.eg.db" >C.elegans</option>
-        <option value="org.Dm.eg.db" >Fly</option>
-        <option value="org.Hs.eg.db" selected="true">Human</option>
-        <option value="org.Mm.eg.db" >Mouse</option>
-        <option value="org.Sc.sqd.db" >Yeast</option>
+        <!--option value="org.At.tair.db" >Arabidopsis</option-->
+        <option value="org.Ce.eg.db" >Worm (C. elegans)</option>
+        <option value="org.Dm.eg.db" >Fly (D. melanogaster)</option>
+        <option value="org.Hs.eg.db" selected="true">Human (H. sapiens)</option>
+        <option value="org.Mm.eg.db" >Mouse (M. musculus)</option>
+        <option value="org.Sc.sgd.db" >Yeast (S. cerevisiae)</option>
       </param>
 
       <param name="ontocat" type="select" label="Ontology category">