comparison GO-enrich.R @ 6:5e16cec55146 draft

planemo upload commit 2da0aec067fd35a8ec102ce27ec4bac8f54b1c30-dirty
author proteore
date Thu, 29 Mar 2018 11:43:28 -0400
parents 8a91f58782df
children 4609346d8108
comparison
equal deleted inserted replaced
5:8a91f58782df 6:5e16cec55146
37 dev.off() 37 dev.off()
38 return(ggo) 38 return(ggo)
39 } 39 }
40 40
41 # GO over-representation test 41 # GO over-representation test
42 enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) { 42 enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) {
43 ego<-enrichGO(gene=geneid, 43 ego<-enrichGO(gene=geneid,
44 universe=universe,
44 OrgDb=orgdb, 45 OrgDb=orgdb,
45 keytype="ENTREZID", 46 keytype="ENTREZID",
46 ont=ontology, 47 ont=ontology,
47 pAdjustMethod="BH", 48 pAdjustMethod="BH",
48 pvalueCutoff=pval_cutoff, 49 pvalueCutoff=pval_cutoff,
49 qvalueCutoff=qval_cutoff, 50 qvalueCutoff=qval_cutoff,
50 readable=TRUE) 51 readable=TRUE)
52 # Plot bar & dot plots
51 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") 53 bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
52 png(bar_name) 54 png(bar_name)
53 p <- barplot(ego) 55 p <- barplot(ego)
54 print(p) 56 print(p)
55 dev.off() 57 dev.off()
71 if("--help" %in% args) { 73 if("--help" %in% args) {
72 cat("clusterProfiler Enrichment Analysis 74 cat("clusterProfiler Enrichment Analysis
73 Arguments: 75 Arguments:
74 --input_type: type of input (list of id or filename) 76 --input_type: type of input (list of id or filename)
75 --input: input 77 --input: input
76 --ncol: the column number which you would like to apply... 78 --ncol: the column number which contains list of input IDs
77 --header: true/false if your file contains a header 79 --header: true/false if your file contains a header
78 --id_type: the type of input IDs (UniProt/EntrezID) 80 --id_type: the type of input IDs (UniProt/EntrezID)
81 --universe_type: list or filename
82 --universe: background IDs list
83 --uncol: the column number which contains background IDs list
84 --uheader: true/false if the background IDs file contains header
85 --universe_id_type: the type of universe IDs (UniProt/EntrezID)
79 --species 86 --species
80 --onto_opt: ontology options 87 --onto_opt: ontology options
81 --go_function: groupGO/enrichGO 88 --go_function: groupGO/enrichGO
82 --level: 1-3 89 --level: 1-3
83 --pval_cutoff 90 --pval_cutoff
88 # Parse arguments 95 # Parse arguments
89 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 96 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
90 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 97 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
91 args <- as.list(as.character(argsDF$V2)) 98 args <- as.list(as.character(argsDF$V2))
92 names(args) <- argsDF$V1 99 names(args) <- argsDF$V1
93 100 #print(args)
101
102 # Extract OrgDb
103 if (args$species=="human") {
104 orgdb<-org.Hs.eg.db
105 }
106 else if (args$species=="mouse") {
107 orgdb<-org.Mm.eg.db
108 }
109 else if (args$species=="rat") {
110 orgdb<-org.Rn.eg.db
111 }
112
113 # Extract input IDs
94 input_type = args$input_type 114 input_type = args$input_type
95 if (input_type == "text") { 115 if (input_type == "text") {
96 input = strsplit(args$input, "[ \t\n]+")[[1]] 116 input = strsplit(args$input, "[ \t\n]+")[[1]]
97 } 117 }
98 else if (input_type == "file") { 118 else if (input_type == "file") {
99 filename = args$input 119 filename = args$input
100 ncol = args$ncol 120 ncol = args$ncol
101 # Check ncol 121 # Check ncol
102 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { 122 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
103 stop("Please enter an integer for level") 123 stop("Please enter the right format for column number: c[number]")
104 } 124 }
105 else { 125 else {
106 ncol = as.numeric(gsub("c", "", ncol)) 126 ncol = as.numeric(gsub("c", "", ncol))
107 } 127 }
108 header = args$header 128 header = args$header
113 for (row in as.character(file[,ncol])) { 133 for (row in as.character(file[,ncol])) {
114 input = c(input, strsplit(row, ";")[[1]][1]) 134 input = c(input, strsplit(row, ";")[[1]][1])
115 } 135 }
116 } 136 }
117 id_type = args$id_type 137 id_type = args$id_type
118 138 ## Get input gene list from input IDs
119
120 #ID format Conversion 139 #ID format Conversion
121 #This case : from UNIPROT (protein id) to ENTREZ (gene id) 140 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
122 #bitr = conversion function from clusterProfiler 141 #bitr = conversion function from clusterProfiler
123
124 if (args$species=="human") {
125 orgdb<-org.Hs.eg.db
126 }
127 else if (args$species=="mouse") {
128 orgdb<-org.Mm.eg.db
129 }
130 else if (args$species=="rat") {
131 orgdb<-org.Rn.eg.db
132 }
133
134 ##to initialize
135 if (id_type=="Uniprot") { 142 if (id_type=="Uniprot") {
136 idFrom<-"UNIPROT" 143 idFrom<-"UNIPROT"
137 idTo<-"ENTREZID" 144 idTo<-"ENTREZID"
138 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) 145 gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
146 gene<-unique(gene$ENTREZID)
139 } 147 }
140 else if (id_type=="Entrez") { 148 else if (id_type=="Entrez") {
141 gene<-input 149 gene<-unique(input)
142 } 150 }
143 151
144 ontology <- strsplit(args$onto_opt, ",")[[1]] 152 ontology <- strsplit(args$onto_opt, ",")[[1]]
153 ## Extract GGO/EGO arguments
145 if (args$go_represent == "true") { 154 if (args$go_represent == "true") {
146 go_represent <- args$go_represent 155 go_represent <- args$go_represent
147 level <- as.numeric(args$level) 156 level <- as.numeric(args$level)
148 } 157 }
149 if (args$go_enrich == "true") { 158 if (args$go_enrich == "true") {
150 go_enrich <- args$go_enrich 159 go_enrich <- args$go_enrich
151 pval_cutoff <- as.numeric(args$pval_cutoff) 160 pval_cutoff <- as.numeric(args$pval_cutoff)
152 qval_cutoff <- as.numeric(args$qval_cutoff) 161 qval_cutoff <- as.numeric(args$qval_cutoff)
162 # Extract universe background genes (same as input file)
163 if (!is.null(args$universe_type)) {
164 universe_type = args$universe_type
165 if (universe_type == "text") {
166 universe = strsplit(args$universe, "[ \t\n]+")[[1]]
167 }
168 else if (universe_type == "file") {
169 universe_filename = args$universe
170 universe_ncol = args$uncol
171 # Check ncol
172 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
173 stop("Please enter the right format for column number: c[number]")
174 }
175 else {
176 universe_ncol = as.numeric(gsub("c", "", universe_ncol))
177 }
178 universe_header = args$uheader
179 # Get file content
180 universe_file = readfile(universe_filename, universe_header)
181 # Extract Protein IDs list
182 universe = c()
183 for (row in as.character(universe_file[,universe_ncol])) {
184 universe = c(universe, strsplit(row, ";")[[1]][1])
185 }
186 }
187 universe_id_type = args$universe_id_type
188 ##to initialize
189 if (universe_id_type=="Uniprot") {
190 idFrom<-"UNIPROT"
191 idTo<-"ENTREZID"
192 universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb)
193 universe_gene<-unique(universe_gene$ENTREZID)
194 }
195 else if (universe_id_type=="Entrez") {
196 universe_gene<-unique(universe)
197 }
198 }
199 else {
200 universe_gene = NULL
201 }
153 } 202 }
154 203
155 ##enrichGO : GO over-representation test 204 ##enrichGO : GO over-representation test
156 for (onto in ontology) { 205 for (onto in ontology) {
157 if (args$go_represent == "true") { 206 if (args$go_represent == "true") {
158 ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE) 207 ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
159 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) 208 write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
160 } 209 }
161 if (args$go_enrich == "true") { 210 if (args$go_enrich == "true") {
162 ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff) 211 ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff)
163 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) 212 write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
164 } 213 }
165 } 214 }
166 } 215 }
167 216