Mercurial > repos > proteore > proteore_topgo
comparison enrichment_v3.R @ 9:70c0c8757f5f draft
planemo upload commit 9d3e0b226140b566fc529fd0ffe7aa9e8388c6e5-dirty
author | proteore |
---|---|
date | Fri, 21 Sep 2018 05:32:38 -0400 |
parents | ddaa0c318d65 |
children |
comparison
equal
deleted
inserted
replaced
8:ddaa0c318d65 | 9:70c0c8757f5f |
---|---|
28 # OUTPUT : | 28 # OUTPUT : |
29 # - outputs commanded by the user named respectively result.tsv for the text | 29 # - outputs commanded by the user named respectively result.tsv for the text |
30 # results file, barplot.png for the barplot image file and dotplot.png for the | 30 # results file, barplot.png for the barplot image file and dotplot.png for the |
31 # dotplot image file | 31 # dotplot image file |
32 | 32 |
33 options(warn=-1) #TURN OFF WARNINGS !!!!!! | |
33 | 34 |
34 # loading topGO library | 35 # loading topGO library |
35 library(topGO) | 36 suppressMessages(library(topGO)) |
36 | 37 |
37 # Read file and return file content as data.frame | 38 # Read file and return file content as data.frame |
38 readfile = function(filename, header) { | 39 readfile = function(filename, header) { |
39 if (header == "true") { | 40 if (header == "true") { |
40 # Read only first line of the file as header: | 41 # Read only first line of the file as header: |
52 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 53 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
53 } | 54 } |
54 return(file) | 55 return(file) |
55 } | 56 } |
56 | 57 |
58 check_ens_ids <- function(vector) { | |
59 ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$" | |
60 return(grepl(ens_pattern,vector)) | |
61 } | |
62 | |
57 '%!in%' <- function(x,y)!('%in%'(x,y)) | 63 '%!in%' <- function(x,y)!('%in%'(x,y)) |
58 | 64 |
59 | 65 |
60 # Parse command line arguments | 66 # Parse command line arguments |
61 | 67 |
80 | 86 |
81 | 87 |
82 if (length(options.args) != 12) { | 88 if (length(options.args) != 12) { |
83 stop("Not enough/Too many arguments", call. = FALSE) | 89 stop("Not enough/Too many arguments", call. = FALSE) |
84 } | 90 } |
91 | |
92 #save(options.args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda") | |
93 #load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.Rda") | |
94 | |
85 | 95 |
86 typeinput = options.args[1] | 96 typeinput = options.args[1] |
87 listfile = options.args[2] | 97 listfile = options.args[2] |
88 onto = as.character(options.args[3]) | 98 onto = as.character(options.args[3]) |
89 option = as.character(options.args[4]) | 99 option = as.character(options.args[4]) |
106 sample = readfile(listfile, "true") | 116 sample = readfile(listfile, "true") |
107 }else{ | 117 }else{ |
108 sample = readfile(listfile, "false") | 118 sample = readfile(listfile, "false") |
109 } | 119 } |
110 sample = sample[,column] | 120 sample = sample[,column] |
111 | 121 } |
112 } | 122 |
123 #check of ENS ids | |
124 if (! any(check_ens_ids(sample))){ | |
125 print("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file") | |
126 stop() | |
127 } | |
128 | |
113 # Launch enrichment analysis and return result data from the analysis or the null | 129 # Launch enrichment analysis and return result data from the analysis or the null |
114 # object if the enrichment could not be done. | 130 # object if the enrichment could not be done. |
115 goEnrichment = function(geneuniverse,sample,onto){ | 131 goEnrichment = function(geneuniverse,sample,onto){ |
116 | 132 |
117 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their | 133 # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their |
237 geneRatio = data$Significant/data$Annotated | 253 geneRatio = data$Significant/data$Annotated |
238 goTerms = data$Term | 254 goTerms = data$Term |
239 count = data$Significant | 255 count = data$Significant |
240 | 256 |
241 labely = paste("GO terms",onto,sep=" ") | 257 labely = paste("GO terms",onto,sep=" ") |
242 png(filename="dotplot.png",res=300, width = 3200, height = 3200, units = "px") | 258 ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) |
243 sp1 = ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point() + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n") | 259 ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") |
244 | |
245 plot(sp1) | |
246 dev.off() | |
247 } | 260 } |
248 | 261 |
249 createBarPlot = function(data, onto){ | 262 createBarPlot = function(data, onto){ |
250 | 263 |
251 | 264 |
253 values = roundValues(values) | 266 values = roundValues(values) |
254 | 267 |
255 values = as.numeric(values) | 268 values = as.numeric(values) |
256 goTerms = data$Term | 269 goTerms = data$Term |
257 count = data$Significant | 270 count = data$Significant |
258 png(filename="barplot.png",res=300, width = 3200, height = 3200, units = "px") | |
259 | 271 |
260 labely = paste("GO terms",onto,sep=" ") | 272 labely = paste("GO terms",onto,sep=" ") |
261 p<-ggplot(data, aes(x=goTerms, y=count,fill=values)) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") | 273 ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") |
262 plot(p) | 274 ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm") |
263 dev.off() | |
264 } | 275 } |
265 | 276 |
266 | 277 |
267 # Produce the different outputs | 278 # Produce the different outputs |
268 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){ | 279 createOutputs = function(result, cut_result,text, barplot, dotplot, onto){ |
340 | 351 |
341 if (dotplot=="TRUE"){ | 352 if (dotplot=="TRUE"){ |
342 | 353 |
343 createDotPlot(cut_result, onto) | 354 createDotPlot(cut_result, onto) |
344 } | 355 } |
345 return(TRUE) | |
346 } | 356 } |
347 | 357 |
348 | 358 |
349 | 359 |
350 # Load R library ggplot2 to plot graphs | 360 # Load R library ggplot2 to plot graphs |
351 library(ggplot2) | 361 suppressMessages(library(ggplot2)) |
352 | 362 |
353 # Launch enrichment analysis | 363 # Launch enrichment analysis |
354 allresult = goEnrichment(geneuniverse,sample,onto) | 364 allresult = suppressMessages(goEnrichment(geneuniverse,sample,onto)) |
355 result = allresult[1][[1]] | 365 result = allresult[1][[1]] |
356 myGOdata = allresult[2][[1]] | 366 myGOdata = allresult[2][[1]] |
357 if (!is.null(result)){ | 367 if (!is.null(result)){ |
358 | 368 |
359 # Adjust the result with a multiple testing correction or not and with the user | 369 # Adjust the result with a multiple testing correction or not and with the user |