comparison GO-enrich.R @ 0:bd052861852b draft

planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
author proteore
date Thu, 01 Mar 2018 10:05:18 -0500
parents
children 710414ebb6db
comparison
equal deleted inserted replaced
-1:000000000000 0:bd052861852b
1 library(clusterProfiler)
2
3 #library(org.Sc.sgd.db)
4 library(org.Hs.eg.db)
5 library(org.Mm.eg.db)
6
7 # Read file and return file content as data.frame?
8 readfile = function(filename, header) {
9 if (header == "true") {
10 # Read only the first line of the files as data (without headers):
11 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
12 #Read the data of the files (skipping the first row):
13 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
14 # Remove empty rows
15 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
16 #And assign the headers of step two to the data:
17 names(file) <- headers
18 }
19 else {
20 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
21 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
22 }
23 return(file)
24 }
25
26 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
27 ggo<-groupGO(gene=geneid,
28 OrgDb = orgdb,
29 ont=ontology,
30 level=level,
31 readable=TRUE)
32 name <- paste("GGO.", ontology, ".png", sep = "")
33 png(name)
34 p <- barplot(ggo)
35 print(p)
36 dev.off()
37 return(ggo)
38 }
39
40 # GO over-representation test
41 enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
42 ego<-enrichGO(gene=geneid,
43 OrgDb=orgdb,
44 keytype="ENTREZID",
45 ont=ontology,
46 pAdjustMethod="BH",
47 pvalueCutoff=pval_cutoff,
48 qvalueCutoff=qval_cutoff,
49 readable=TRUE)
50 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
51 png(bar_name)
52 p <- barplot(ego)
53 print(p)
54 dev.off()
55 dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
56 png(dot_name)
57 p <- dotplot(ego)
58 print(p)
59 dev.off()
60 return(ego)
61 }
62
63 clusterProfiler = function() {
64 args <- commandArgs(TRUE)
65 if(length(args)<1) {
66 args <- c("--help")
67 }
68
69 # Help section
70 if("--help" %in% args) {
71 cat("clusterProfiler Enrichment Analysis
72 Arguments:
73 --input_type: type of input (list of id or filename)
74 --input: input
75 --ncol: the column number which you would like to apply...
76 --header: true/false if your file contains a header
77 --id_type: the type of input IDs (UniProt/EntrezID)
78 --species
79 --onto_opt: ontology options
80 --go_function: groupGO/enrichGO
81 --level: 1-3
82 --pval_cutoff
83 --qval_cutoff
84 --text_output: text output filename \n")
85 q(save="no")
86 }
87 # Parse arguments
88 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
89 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
90 args <- as.list(as.character(argsDF$V2))
91 names(args) <- argsDF$V1
92
93 input_type = args$input_type
94 if (input_type == "text") {
95 input = args$input
96 }
97 else if (input_type == "file") {
98 filename = args$input
99 ncol = args$ncol
100 # Check ncol
101 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
102 stop("Please enter an integer for level")
103 }
104 else {
105 ncol = as.numeric(gsub("c", "", ncol))
106 }
107 header = args$header
108 # Get file content
109 file = readfile(filename, header)
110 # Extract Protein IDs list
111 input = c()
112 for (row in as.character(file[,ncol])) {
113 input = c(input, strsplit(row, ";")[[1]][1])
114 }
115 }
116 id_type = args$id_type
117
118
119 #ID format Conversion
120 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
121 #bitr = conversion function from clusterProfiler
122
123 if (args$species=="human") {
124 orgdb<-org.Hs.eg.db
125 }
126 else if (args$species=="mouse") {
127 orgdb<-org.Mm.eg.db
128 }
129 else if (args$species=="rat") {
130 orgdb<-org.Rn.eg.db
131 }
132
133 ##to initialize
134 if (id_type=="Uniprot") {
135 idFrom<-"UNIPROT"
136 idTo<-"ENTREZID"
137 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
138 }
139 else if (id_type=="Entrez") {
140 gene<-input
141 }
142
143 ontology <- strsplit(args$onto_opt, ",")[[1]]
144 if (args$go_represent == "true") {
145 go_represent <- args$go_represent
146 level <- as.numeric(args$level)
147 }
148 if (args$go_enrich == "true") {
149 go_enrich <- args$go_enrich
150 pval_cutoff <- as.numeric(args$pval_cutoff)
151 qval_cutoff <- as.numeric(args$qval_cutoff)
152 }
153
154 ##enrichGO : GO over-representation test
155 for (onto in ontology) {
156 if (args$go_represent == "true") {
157 ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
158 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
159 }
160 if (args$go_enrich == "true") {
161 ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
162 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
163 }
164 }
165 }
166
167 clusterProfiler()