diff goprofiles.R @ 8:386145573c19 draft

planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author proteore
date Tue, 18 Dec 2018 09:54:57 -0500
parents 3e138d54c105
children 948fecb6a40b
line wrap: on
line diff
--- a/goprofiles.R	Fri Sep 21 10:08:02 2018 -0400
+++ b/goprofiles.R	Tue Dec 18 09:54:57 2018 -0500
@@ -4,23 +4,24 @@
 suppressMessages(library(goProfiles,quietly = TRUE))
 
 # Read file and return file content as data.frame
-readfile = function(filename, header) {
-  if (header == "true") {
-    # Read only first line of the file as header:
-    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
-    #Read the data of the files (skipping the first row)
-    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
-    # Remove empty rows
-    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
-    #And assign the header to the data
-    names(file) <- headers
+read_file <- function(path,header){
+  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
+  if (inherits(file,"try-error")){
+    stop("File not found !")
+  }else{
+    return(file)
   }
-  else {
-    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
-    # Remove empty rows
-    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+}
+
+#convert a string to boolean
+str2bool <- function(x){
+  if (any(is.element(c("t","true"),tolower(x)))){
+    return (TRUE)
+  }else if (any(is.element(c("f","false"),tolower(x)))){
+    return (FALSE)
+  }else{
+    return(NULL)
   }
-  return(file)
 }
 
 check_ids <- function(vector,type) {
@@ -49,10 +50,10 @@
     package=org.Hs.eg.db
   } else if (species=="org.Mm.eg.db"){
     package=org.Mm.eg.db
+  } else if (species=="org.Rn.eg.db"){
+    package=org.Rn.eg.db
   }
   
-  
-  
   # Check if level is number
   if (! as.numeric(level) %% 1 == 0) {
     stop("Please enter an integer for level")
@@ -75,8 +76,8 @@
     genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
     # IDs that have NA ENTREZID
     NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
-    print("IDs unable to convert to ENTREZID: ")
-    print(NAs)
+    #print("IDs unable to convert to ENTREZID: ")
+    #print(NAs)
   }
   
   # Create basic profiles
@@ -91,77 +92,20 @@
   return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
 }
 
-# Plot profiles to PNG
-plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
-  if (!is.null(profile.CC)) {
-    png("profile.CC.png")
-    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.BP)) {
-    png("profile.BP.png")
-    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.MF)) {
-    png("profile.MF.png")
-    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.ALL)) {
-    png("profile.ALL.png")
-    plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
-    dev.off()
-  }
-}
-
-# Plot profiles to JPEG
-plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
-  if (!is.null(profile.CC)) {
-    jpeg("profile.CC.jpeg")
-    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.BP)) {
-    jpeg("profile.BP.jpeg")
-    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
+make_plot <- function(profile,percent,title,onto,plot_opt){
+  
+  if (plot_opt == "PDF") {
+    file_name=paste("profile_",onto,".pdf",collapse="",sep="")
+    pdf(file_name)
+  } else if (plot_opt == "JPEG"){
+    file_name=paste("profile_",onto,".jpeg",collapse="",sep="")
+    jpeg(file_name)
+  } else if (plot_opt == "PNG"){
+    file_name=paste("profile_",onto,".png",collapse="",sep="")
+    png(file_name)
   }
-  if (!is.null(profile.MF)) {
-    jpeg("profile.MF.jpeg")
-    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.ALL)) {
-    jpeg("profile.ALL.jpeg")
-    plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-}
-
-# Plot profiles to PDF
-plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
-  if (!is.null(profile.CC)) {
-    pdf("profile.CC.pdf")
-    plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.BP)) {
-    pdf("profile.BP.pdf")
-    plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.MF)) {
-    pdf("profile.MF.pdf")
-    plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
-  if (!is.null(profile.ALL)) {
-    #print("all")
-    pdf("profile.ALL.pdf")
-    plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
-    dev.off()
-  }
+  plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title)
+  dev.off()
 }
 
 goprofiles = function() {
@@ -212,9 +156,9 @@
     } else {
       ncol = as.numeric(gsub("c", "", ncol))
     }
-    header = args$header
+    header = str2bool(args$header)
     # Get file content
-    file = readfile(filename, header)
+    file = read_file(filename, header)
     # Extract Protein IDs list
     input = unlist(strsplit(as.character(file[,ncol]),";"))
     input = input [which(!is.na(input))]
@@ -225,8 +169,7 @@
   }
   
   ontoopt = strsplit(args$onto_opt, ",")[[1]]
-  #print(ontoopt)
-  #plotopt = strsplit(args[3], ",")
+  onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt))))
   plotopt = args$plot_opt
   level = args$level
   per = as.logical(args$per)
@@ -236,51 +179,15 @@
   species=args$species
 
   profiles = getprofile(input, id_type, level, duplicate,species)
-  profile.CC = profiles[1]
-  #print(profile.CC)
-  profile.MF = profiles[2]
-  #print(profile.MF)
-  profile.BP = profiles[3]
-  #print(profile.BP)
-  profile.ALL = profiles[-3:-1]
-  #print(profile.ALL)
-  #c(profile.ALL, profile.CC, profile.MF, profile.BP)
-    
-  if ("CC" %in% ontoopt) {
-    write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
-    if (grepl("PNG", plotopt)) {
-      plotPNG(profile.CC=profile.CC, per=per, title=title)
-    }
-    if (grepl("JPEG", plotopt)) {
-      plotJPEG(profile.CC = profile.CC, per=per, title=title)
-    }
-    if (grepl("PDF", plotopt)) {
-      plotPDF(profile.CC = profile.CC, per=per, title=title)
-    }
-  }
-  if ("MF" %in% ontoopt) {
-    write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
-    if (grepl("PNG", plotopt)) {
-      plotPNG(profile.MF = profile.MF, per=per, title=title)
-    }
-    if (grepl("JPEG", plotopt)) {
-      plotJPEG(profile.MF = profile.MF, per=per, title=title)
-    }
-    if (grepl("PDF", plotopt)) {
-      plotPDF(profile.MF = profile.MF, per=per, title=title)
-    }
-  }
-  if ("BP" %in% ontoopt) {
-    write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
-    if (grepl("PNG", plotopt)) {
-      plotPNG(profile.BP = profile.BP, per=per, title=title)
-    }
-    if (grepl("JPEG", plotopt)) {
-      plotJPEG(profile.BP = profile.BP, per=per, title=title)
-    }
-    if (grepl("PDF", plotopt)) {
-      plotPDF(profile.BP = profile.BP, per=per, title=title)
-    }
+
+  for (index in onto_pos) {
+    onto = names(profiles[index])
+    profile=profiles[index]
+    make_plot(profile,per,title,onto,plotopt)
+    text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="")
+    profile = as.data.frame(profile)
+    profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x)))  #convert "" and " " to NA
+    write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T)
   }
 }