view goprofiles.R @ 4:715002a394ec draft

planemo upload commit b36435833bf54f90f62cc240f2cda1c889161b23-dirty
author proteore
date Fri, 23 Mar 2018 10:47:17 -0400
parents 58a8ddd58dde
children 781072a65600
line wrap: on
line source

# Load necessary libraries
library(org.Hs.eg.db)
library(goProfiles)

# Read file and return file content as data.frame
readfile = function(filename, header) {
  if (header == "true") {
    # Read only first line of the file as header:
    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
    #Read the data of the files (skipping the first row)
    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
    # Remove empty rows
    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
    #And assign the header to the data
    names(file) <- headers
  }
  else {
    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
    # Remove empty rows
    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
  }
  return(file)
}

getprofile = function(ids, id_type, level, duplicate) {
  ####################################################################
  # Arguments
  #   - ids: list of input IDs
  #   - id_type: type of input IDs (UniProt/ENTREZID)
  #   - level
  #   - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
  ####################################################################
  
  # Check if level is number
  if (! as.numeric(level) %% 1 == 0) {
    stop("Please enter an integer for level")
  }
  else {
    level = as.numeric(level)
  }
  #genes = as.vector(file[,ncol])
  
  # Extract Gene Entrez ID
  if (id_type == "Entrez") {
    id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
    genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
  }
  else {
    genes_ids = c()
    id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
    if (duplicate == "TRUE") {
      id = unique(id)
    }
    print(id[[1]])
    genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
    # IDs that have NA ENTREZID
    NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
    print("IDs unable to convert to ENTREZID: ")
    print(NAs)
  }
  
  # Create basic profiles
  profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
  profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
  profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
  profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
  
  # Print profile
  # printProfiles(profile)
  
  return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
}

# Plot profiles to PNG
plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
  if (!is.null(profile.CC)) {
    png("profile.CC.png")
    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.BP)) {
    png("profile.BP.png")
    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.MF)) {
    png("profile.MF.png")
    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.ALL)) {
    png("profile.ALL.png")
    plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
    dev.off()
  }
}

# Plot profiles to JPEG
plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
  if (!is.null(profile.CC)) {
    jpeg("profile.CC.jpeg")
    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.BP)) {
    jpeg("profile.BP.jpeg")
    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.MF)) {
    jpeg("profile.MF.jpeg")
    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.ALL)) {
    jpeg("profile.ALL.jpeg")
    plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
}

# Plot profiles to PDF
plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
  if (!is.null(profile.CC)) {
    pdf("profile.CC.pdf")
    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.BP)) {
    pdf("profile.BP.pdf")
    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.MF)) {
    pdf("profile.MF.pdf")
    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
  if (!is.null(profile.ALL)) {
    #print("all")
    pdf("profile.ALL.pdf")
    plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
    dev.off()
  }
}

goprofiles = function() {
  args <- commandArgs(TRUE)
  if(length(args)<1) {
    args <- c("--help")
  }
  
  # Help section
  if("--help" %in% args) {
    cat("Selection and Annotation HPA
    Arguments:
        --input_type: type of input (list of id or filename)
        --input: input
        --ncol: the column number which you would like to apply...
        --header: true/false if your file contains a header
        --id_type: the type of input IDs (UniProt/EntrezID)
        --onto_opt: ontology options
        --plot_opt: plot extension options (PDF/JPEG/PNG)
        --level: 1-3
        --per
        --title: title of the plot
        --duplicate: remove dupliate input IDs (true/false)
        --text_output: text output filename \n")
    q(save="no")
  }
  
  # Parse arguments
  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
  args <- as.list(as.character(argsDF$V2))
  names(args) <- argsDF$V1

  input_type = args$input_type
  if (input_type == "text") {
    input = strsplit(args$input, "[ \t\n]+")[[1]]
  }
  else if (input_type == "file") {
    filename = args$input
    ncol = args$ncol
    # Check ncol
    if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
      stop("Please enter an integer for level")
    }
    else {
      ncol = as.numeric(gsub("c", "", ncol))
    }
    header = args$header
    # Get file content
    file = readfile(filename, header)
    # Extract Protein IDs list
    input = c()
    for (row in as.character(file[,ncol])) {
      input = c(input, strsplit(row, ";")[[1]][1])
    }
  }
  id_type = args$id_type
  ontoopt = strsplit(args$onto_opt, ",")[[1]]
  #print(ontoopt)
  #plotopt = strsplit(args[3], ",")
  plotopt = args$plot_opt
  level = args$level
  per = as.logical(args$per)
  title = args$title
  duplicate = args$duplicate
  text_output = args$text_output

  profiles = getprofile(input, id_type, level, duplicate)
  profile.CC = profiles[1]
  #print(profile.CC)
  profile.MF = profiles[2]
  #print(profile.MF)
  profile.BP = profiles[3]
  #print(profile.BP)
  profile.ALL = profiles[-3:-1]
  #print(profile.ALL)
  #c(profile.ALL, profile.CC, profile.MF, profile.BP)
    
  if ("CC" %in% ontoopt) {
    write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
    if (grepl("PNG", plotopt)) {
      plotPNG(profile.CC=profile.CC, per=per, title=title)
    }
    if (grepl("JPEG", plotopt)) {
      plotJPEG(profile.CC = profile.CC, per=per, title=title)
    }
    if (grepl("PDF", plotopt)) {
      plotPDF(profile.CC = profile.CC, per=per, title=title)
    }
  }
  if ("MF" %in% ontoopt) {
    write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
    if (grepl("PNG", plotopt)) {
      plotPNG(profile.MF = profile.MF, per=per, title=title)
    }
    if (grepl("JPEG", plotopt)) {
      plotJPEG(profile.MF = profile.MF, per=per, title=title)
    }
    if (grepl("PDF", plotopt)) {
      plotPDF(profile.MF = profile.MF, per=per, title=title)
    }
  }
  if ("BP" %in% ontoopt) {
    write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
    if (grepl("PNG", plotopt)) {
      plotPNG(profile.BP = profile.BP, per=per, title=title)
    }
    if (grepl("JPEG", plotopt)) {
      plotJPEG(profile.BP = profile.BP, per=per, title=title)
    }
    if (grepl("PDF", plotopt)) {
      plotPDF(profile.BP = profile.BP, per=per, title=title)
    }
  }
}

goprofiles()