Mercurial > repos > proteore > proteore_clusterprofiler
diff GO-enrich.R @ 0:bd052861852b draft
planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
author | proteore |
---|---|
date | Thu, 01 Mar 2018 10:05:18 -0500 |
parents | |
children | 710414ebb6db |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GO-enrich.R Thu Mar 01 10:05:18 2018 -0500 @@ -0,0 +1,167 @@ +library(clusterProfiler) + +#library(org.Sc.sgd.db) +library(org.Hs.eg.db) +library(org.Mm.eg.db) + +# Read file and return file content as data.frame? +readfile = function(filename, header) { + if (header == "true") { + # Read only the first line of the files as data (without headers): + headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) + #Read the data of the files (skipping the first row): + file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) + # Remove empty rows + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] + #And assign the headers of step two to the data: + names(file) <- headers + } + else { + file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] + } + return(file) +} + +repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { + ggo<-groupGO(gene=geneid, + OrgDb = orgdb, + ont=ontology, + level=level, + readable=TRUE) + name <- paste("GGO.", ontology, ".png", sep = "") + png(name) + p <- barplot(ggo) + print(p) + dev.off() + return(ggo) +} + +# GO over-representation test +enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) { + ego<-enrichGO(gene=geneid, + OrgDb=orgdb, + keytype="ENTREZID", + ont=ontology, + pAdjustMethod="BH", + pvalueCutoff=pval_cutoff, + qvalueCutoff=qval_cutoff, + readable=TRUE) + bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") + png(bar_name) + p <- barplot(ego) + print(p) + dev.off() + dot_name <- paste("EGO.", ontology, ".dot.png", sep = "") + png(dot_name) + p <- dotplot(ego) + print(p) + dev.off() + return(ego) +} + +clusterProfiler = function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("clusterProfiler Enrichment Analysis + Arguments: + --input_type: type of input (list of id or filename) + --input: input + --ncol: the column number which you would like to apply... + --header: true/false if your file contains a header + --id_type: the type of input IDs (UniProt/EntrezID) + --species + --onto_opt: ontology options + --go_function: groupGO/enrichGO + --level: 1-3 + --pval_cutoff + --qval_cutoff + --text_output: text output filename \n") + q(save="no") + } + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + input_type = args$input_type + if (input_type == "text") { + input = args$input + } + else if (input_type == "file") { + filename = args$input + ncol = args$ncol + # Check ncol + if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { + stop("Please enter an integer for level") + } + else { + ncol = as.numeric(gsub("c", "", ncol)) + } + header = args$header + # Get file content + file = readfile(filename, header) + # Extract Protein IDs list + input = c() + for (row in as.character(file[,ncol])) { + input = c(input, strsplit(row, ";")[[1]][1]) + } + } + id_type = args$id_type + + + #ID format Conversion + #This case : from UNIPROT (protein id) to ENTREZ (gene id) + #bitr = conversion function from clusterProfiler + + if (args$species=="human") { + orgdb<-org.Hs.eg.db + } + else if (args$species=="mouse") { + orgdb<-org.Mm.eg.db + } + else if (args$species=="rat") { + orgdb<-org.Rn.eg.db + } + + ##to initialize + if (id_type=="Uniprot") { + idFrom<-"UNIPROT" + idTo<-"ENTREZID" + gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) + } + else if (id_type=="Entrez") { + gene<-input + } + + ontology <- strsplit(args$onto_opt, ",")[[1]] + if (args$go_represent == "true") { + go_represent <- args$go_represent + level <- as.numeric(args$level) + } + if (args$go_enrich == "true") { + go_enrich <- args$go_enrich + pval_cutoff <- as.numeric(args$pval_cutoff) + qval_cutoff <- as.numeric(args$qval_cutoff) + } + + ##enrichGO : GO over-representation test + for (onto in ontology) { + if (args$go_represent == "true") { + ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE) + write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + } + if (args$go_enrich == "true") { + ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff) + write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + } + } +} + +clusterProfiler()