Mercurial > repos > proteore > proteore_clusterprofiler
comparison GO-enrich.R @ 10:d951677a50d4 draft
planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
author | proteore |
---|---|
date | Fri, 28 Jun 2019 05:08:48 -0400 |
parents | 2f67202ffdb3 |
children | f6107b8ae8f8 |
comparison
equal
deleted
inserted
replaced
9:2f67202ffdb3 | 10:d951677a50d4 |
---|---|
42 width=1000 | 42 width=1000 |
43 } | 43 } |
44 return (width) | 44 return (width) |
45 } | 45 } |
46 | 46 |
47 repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { | 47 repartition_GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { |
48 ggo<-groupGO(gene=geneid, | 48 ggo<-groupGO(gene=geneid, |
49 OrgDb = orgdb, | 49 OrgDb = orgdb, |
50 ont=ontology, | 50 ont=ontology, |
51 level=level, | 51 level=level, |
52 readable=TRUE) | 52 readable=TRUE) |
64 return(ggo) | 64 return(ggo) |
65 } | 65 } |
66 } | 66 } |
67 | 67 |
68 # GO over-representation test | 68 # GO over-representation test |
69 enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { | 69 enrich_GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { |
70 ego<-enrichGO(gene=geneid, | 70 ego<-enrichGO(gene=geneid, |
71 universe=universe, | 71 universe=universe, |
72 OrgDb=orgdb, | 72 OrgDb=orgdb, |
73 ont=ontology, | 73 ont=ontology, |
74 pAdjustMethod="BH", | 74 pAdjustMethod="BH", |
105 } else { | 105 } else { |
106 warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE) | 106 warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE) |
107 } | 107 } |
108 } | 108 } |
109 | 109 |
110 clean_ids <- function(ids){ | |
111 ids = gsub(" ","",ids) | |
112 ids = ids[which(ids!="")] | |
113 ids = ids[which(ids!="NA")] | |
114 ids = ids[!is.na(ids)] | |
115 | |
116 return(ids) | |
117 } | |
118 | |
110 check_ids <- function(vector,type) { | 119 check_ids <- function(vector,type) { |
111 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | 120 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" |
112 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | 121 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" |
113 if (type == "entrez") | 122 if (type == "entrez") |
114 return(grepl(entrez_id,vector)) | 123 return(grepl(entrez_id,vector)) |
115 else if (type == "uniprot") { | 124 else if (type == "uniprot") { |
116 return(grepl(uniprot_pattern,vector)) | 125 return(grepl(uniprot_pattern,vector)) |
117 } | 126 } |
118 } | 127 } |
119 | 128 |
120 clusterProfiler = function() { | 129 get_args <- function(){ |
121 args <- commandArgs(TRUE) | 130 args <- commandArgs(TRUE) |
122 if(length(args)<1) { | 131 if(length(args)<1) { |
123 args <- c("--help") | 132 args <- c("--help") |
124 } | 133 } |
125 | 134 |
151 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 160 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
152 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 161 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
153 args <- as.list(as.character(argsDF$V2)) | 162 args <- as.list(as.character(argsDF$V2)) |
154 names(args) <- argsDF$V1 | 163 names(args) <- argsDF$V1 |
155 | 164 |
165 return(args) | |
166 } | |
167 | |
168 | |
169 main <- function() { | |
170 | |
171 #get args from command | |
172 args <- get_args() | |
173 | |
156 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") | 174 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") |
157 #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") | 175 #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") |
158 | |
159 | 176 |
160 go_represent=str2bool(args$go_represent) | 177 go_represent=str2bool(args$go_represent) |
161 go_enrich=str2bool(args$go_enrich) | 178 go_enrich=str2bool(args$go_enrich) |
162 if (go_enrich){ | 179 if (go_enrich){ |
163 plot = unlist(strsplit(args$plot,",")) | 180 plot = unlist(strsplit(args$plot,",")) |
177 # Extract input IDs | 194 # Extract input IDs |
178 input_type = args$input_type | 195 input_type = args$input_type |
179 id_type = args$id_type | 196 id_type = args$id_type |
180 | 197 |
181 if (input_type == "text") { | 198 if (input_type == "text") { |
182 input = strsplit(args$input, "[ \t\n]+")[[1]] | 199 input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";")) |
183 } else if (input_type == "file") { | 200 } else if (input_type == "file") { |
184 filename = args$input | 201 filename = args$input |
185 ncol = args$ncol | 202 ncol = args$ncol |
186 # Check ncol | 203 # Check ncol |
187 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 204 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
191 } | 208 } |
192 header = str2bool(args$header) # Get file content | 209 header = str2bool(args$header) # Get file content |
193 file = read_file(filename, header) # Extract Protein IDs list | 210 file = read_file(filename, header) # Extract Protein IDs list |
194 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) | 211 input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) |
195 } | 212 } |
196 | 213 input = clean_ids(input) |
197 | 214 |
198 ## Get input gene list from input IDs | 215 ## Get input gene list from input IDs |
199 #ID format Conversion | 216 #ID format Conversion |
200 #This case : from UNIPROT (protein id) to ENTREZ (gene id) | 217 #This case : from UNIPROT (protein id) to ENTREZ (gene id) |
201 #bitr = conversion function from clusterProfiler | 218 #bitr = conversion function from clusterProfiler |
220 qval_cutoff <- as.numeric(args$qval_cutoff) | 237 qval_cutoff <- as.numeric(args$qval_cutoff) |
221 # Extract universe background genes (same as input file) | 238 # Extract universe background genes (same as input file) |
222 if (!is.null(args$universe_type)) { | 239 if (!is.null(args$universe_type)) { |
223 universe_type = args$universe_type | 240 universe_type = args$universe_type |
224 if (universe_type == "text") { | 241 if (universe_type == "text") { |
225 universe = strsplit(args$universe, "[ \t\n]+")[[1]] | 242 universe = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";")) |
226 } else if (universe_type == "file") { | 243 } else if (universe_type == "file") { |
227 universe_filename = args$universe | 244 universe_filename = args$universe |
228 universe_ncol = args$uncol | 245 universe_ncol = args$uncol |
229 # Check ncol | 246 # Check ncol |
230 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) { | 247 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) { |
236 # Get file content | 253 # Get file content |
237 universe_file = read_file(universe_filename, universe_header) | 254 universe_file = read_file(universe_filename, universe_header) |
238 # Extract Protein IDs list | 255 # Extract Protein IDs list |
239 universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) | 256 universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) |
240 } | 257 } |
258 universe = clean_ids(input) | |
241 universe_id_type = args$universe_id_type | 259 universe_id_type = args$universe_id_type |
242 ##to initialize | 260 ##to initialize |
243 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) { | 261 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) { |
244 idFrom<-"UNIPROT" | 262 idFrom<-"UNIPROT" |
245 idTo<-"ENTREZID" | 263 idTo<-"ENTREZID" |
263 } | 281 } |
264 | 282 |
265 ##enrichGO : GO over-representation test | 283 ##enrichGO : GO over-representation test |
266 for (onto in ontology) { | 284 for (onto in ontology) { |
267 if (go_represent) { | 285 if (go_represent) { |
268 ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) | 286 ggo<-repartition_GO(gene, orgdb, onto, level, readable=TRUE) |
269 if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA | 287 if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA |
270 output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="") | 288 output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="") |
271 write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE ) | 289 write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE ) |
272 } | 290 } |
273 | 291 |
274 if (go_enrich) { | 292 if (go_enrich) { |
275 ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) | 293 ego<-enrich_GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) |
276 if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA | 294 if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA |
277 output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="") | 295 output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="") |
278 write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE ) | 296 write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE ) |
279 } | 297 } |
280 } | 298 } |
281 } | 299 } |
282 | 300 |
283 clusterProfiler() | 301 if(!interactive()) { |
302 main() | |
303 } |