diff GO-enrich.R @ 0:bd052861852b draft

planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
author proteore
date Thu, 01 Mar 2018 10:05:18 -0500
parents
children 710414ebb6db
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GO-enrich.R	Thu Mar 01 10:05:18 2018 -0500
@@ -0,0 +1,167 @@
+library(clusterProfiler)
+
+#library(org.Sc.sgd.db)
+library(org.Hs.eg.db)
+library(org.Mm.eg.db)
+
+# Read file and return file content as data.frame?
+readfile = function(filename, header) {
+  if (header == "true") {
+    # Read only the first line of the files as data (without headers):
+    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    #Read the data of the files (skipping the first row):
+    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    # Remove empty rows
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+    #And assign the headers of step two to the data:
+    names(file) <- headers
+  }
+  else {
+    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+  }
+  return(file)
+}
+
+repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
+  ggo<-groupGO(gene=geneid, 
+               OrgDb = orgdb, 
+               ont=ontology, 
+               level=level, 
+               readable=TRUE)
+  name <- paste("GGO.", ontology, ".png", sep = "")
+  png(name)
+  p <- barplot(ggo)
+  print(p)
+  dev.off()
+  return(ggo)
+}
+
+# GO over-representation test
+enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
+  ego<-enrichGO(gene=geneid,
+                OrgDb=orgdb,
+                keytype="ENTREZID",
+                ont=ontology,
+                pAdjustMethod="BH",
+                pvalueCutoff=pval_cutoff,
+                qvalueCutoff=qval_cutoff,
+                readable=TRUE)
+  bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
+  png(bar_name)
+  p <- barplot(ego)
+  print(p)
+  dev.off()
+  dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
+  png(dot_name)
+  p <- dotplot(ego)
+  print(p)
+  dev.off()
+  return(ego)
+}
+
+clusterProfiler = function() {
+  args <- commandArgs(TRUE)
+  if(length(args)<1) {
+    args <- c("--help")
+  }
+  
+  # Help section
+  if("--help" %in% args) {
+    cat("clusterProfiler Enrichment Analysis
+    Arguments:
+        --input_type: type of input (list of id or filename)
+        --input: input
+        --ncol: the column number which you would like to apply...
+        --header: true/false if your file contains a header
+        --id_type: the type of input IDs (UniProt/EntrezID)
+        --species
+        --onto_opt: ontology options
+        --go_function: groupGO/enrichGO
+        --level: 1-3
+        --pval_cutoff
+        --qval_cutoff
+        --text_output: text output filename \n")
+    q(save="no")
+  }
+  # Parse arguments
+  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+  args <- as.list(as.character(argsDF$V2))
+  names(args) <- argsDF$V1
+
+  input_type = args$input_type
+  if (input_type == "text") {
+    input = args$input
+  }
+  else if (input_type == "file") {
+    filename = args$input
+    ncol = args$ncol
+    # Check ncol
+    if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+      stop("Please enter an integer for level")
+    }
+    else {
+      ncol = as.numeric(gsub("c", "", ncol))
+    }
+    header = args$header
+    # Get file content
+    file = readfile(filename, header)
+    # Extract Protein IDs list
+    input = c()
+    for (row in as.character(file[,ncol])) {
+      input = c(input, strsplit(row, ";")[[1]][1])
+    }
+  }
+  id_type = args$id_type
+
+  
+  #ID format Conversion 
+  #This case : from UNIPROT (protein id) to ENTREZ (gene id)
+  #bitr = conversion function from clusterProfiler
+
+  if (args$species=="human") {
+    orgdb<-org.Hs.eg.db
+  }
+  else if (args$species=="mouse") {
+    orgdb<-org.Mm.eg.db
+  }
+  else if (args$species=="rat") {
+    orgdb<-org.Rn.eg.db
+  }
+  
+  ##to initialize
+  if (id_type=="Uniprot") {
+    idFrom<-"UNIPROT"
+    idTo<-"ENTREZID"
+    gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+  }
+  else if (id_type=="Entrez") {
+    gene<-input
+  }
+
+  ontology <- strsplit(args$onto_opt, ",")[[1]]
+  if (args$go_represent == "true") {
+    go_represent <- args$go_represent
+    level <- as.numeric(args$level)
+  }
+  if (args$go_enrich == "true") {
+    go_enrich <- args$go_enrich
+    pval_cutoff <- as.numeric(args$pval_cutoff)
+    qval_cutoff <- as.numeric(args$qval_cutoff)
+  }
+
+  ##enrichGO : GO over-representation test
+  for (onto in ontology) {
+    if (args$go_represent == "true") {
+      ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
+      write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    }
+    if (args$go_enrich == "true") {
+      ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
+      write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    }
+  }
+}
+
+clusterProfiler()