view GO-enrich.R @ 0:bd052861852b draft

planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
author proteore
date Thu, 01 Mar 2018 10:05:18 -0500
parents
children 710414ebb6db
line wrap: on
line source

library(clusterProfiler)

#library(org.Sc.sgd.db)
library(org.Hs.eg.db)
library(org.Mm.eg.db)

# Read file and return file content as data.frame?
readfile = function(filename, header) {
  if (header == "true") {
    # Read only the first line of the files as data (without headers):
    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
    #Read the data of the files (skipping the first row):
    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
    # Remove empty rows
    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
    #And assign the headers of step two to the data:
    names(file) <- headers
  }
  else {
    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
  }
  return(file)
}

repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
  ggo<-groupGO(gene=geneid, 
               OrgDb = orgdb, 
               ont=ontology, 
               level=level, 
               readable=TRUE)
  name <- paste("GGO.", ontology, ".png", sep = "")
  png(name)
  p <- barplot(ggo)
  print(p)
  dev.off()
  return(ggo)
}

# GO over-representation test
enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
  ego<-enrichGO(gene=geneid,
                OrgDb=orgdb,
                keytype="ENTREZID",
                ont=ontology,
                pAdjustMethod="BH",
                pvalueCutoff=pval_cutoff,
                qvalueCutoff=qval_cutoff,
                readable=TRUE)
  bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
  png(bar_name)
  p <- barplot(ego)
  print(p)
  dev.off()
  dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
  png(dot_name)
  p <- dotplot(ego)
  print(p)
  dev.off()
  return(ego)
}

clusterProfiler = function() {
  args <- commandArgs(TRUE)
  if(length(args)<1) {
    args <- c("--help")
  }
  
  # Help section
  if("--help" %in% args) {
    cat("clusterProfiler Enrichment Analysis
    Arguments:
        --input_type: type of input (list of id or filename)
        --input: input
        --ncol: the column number which you would like to apply...
        --header: true/false if your file contains a header
        --id_type: the type of input IDs (UniProt/EntrezID)
        --species
        --onto_opt: ontology options
        --go_function: groupGO/enrichGO
        --level: 1-3
        --pval_cutoff
        --qval_cutoff
        --text_output: text output filename \n")
    q(save="no")
  }
  # Parse arguments
  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
  args <- as.list(as.character(argsDF$V2))
  names(args) <- argsDF$V1

  input_type = args$input_type
  if (input_type == "text") {
    input = args$input
  }
  else if (input_type == "file") {
    filename = args$input
    ncol = args$ncol
    # Check ncol
    if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
      stop("Please enter an integer for level")
    }
    else {
      ncol = as.numeric(gsub("c", "", ncol))
    }
    header = args$header
    # Get file content
    file = readfile(filename, header)
    # Extract Protein IDs list
    input = c()
    for (row in as.character(file[,ncol])) {
      input = c(input, strsplit(row, ";")[[1]][1])
    }
  }
  id_type = args$id_type

  
  #ID format Conversion 
  #This case : from UNIPROT (protein id) to ENTREZ (gene id)
  #bitr = conversion function from clusterProfiler

  if (args$species=="human") {
    orgdb<-org.Hs.eg.db
  }
  else if (args$species=="mouse") {
    orgdb<-org.Mm.eg.db
  }
  else if (args$species=="rat") {
    orgdb<-org.Rn.eg.db
  }
  
  ##to initialize
  if (id_type=="Uniprot") {
    idFrom<-"UNIPROT"
    idTo<-"ENTREZID"
    gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
  }
  else if (id_type=="Entrez") {
    gene<-input
  }

  ontology <- strsplit(args$onto_opt, ",")[[1]]
  if (args$go_represent == "true") {
    go_represent <- args$go_represent
    level <- as.numeric(args$level)
  }
  if (args$go_enrich == "true") {
    go_enrich <- args$go_enrich
    pval_cutoff <- as.numeric(args$pval_cutoff)
    qval_cutoff <- as.numeric(args$qval_cutoff)
  }

  ##enrichGO : GO over-representation test
  for (onto in ontology) {
    if (args$go_represent == "true") {
      ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
      write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
    }
    if (args$go_enrich == "true") {
      ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
      write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
    }
  }
}

clusterProfiler()