Mercurial > repos > proteore > proteore_goprofiles
diff goprofiles.R @ 8:386145573c19 draft
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author | proteore |
---|---|
date | Tue, 18 Dec 2018 09:54:57 -0500 |
parents | 3e138d54c105 |
children | 948fecb6a40b |
line wrap: on
line diff
--- a/goprofiles.R Fri Sep 21 10:08:02 2018 -0400 +++ b/goprofiles.R Tue Dec 18 09:54:57 2018 -0500 @@ -4,23 +4,24 @@ suppressMessages(library(goProfiles,quietly = TRUE)) # Read file and return file content as data.frame -readfile = function(filename, header) { - if (header == "true") { - # Read only first line of the file as header: - headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - #Read the data of the files (skipping the first row) - file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - #And assign the header to the data - names(file) <- headers +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) } - else { - file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] +} + +#convert a string to boolean +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) } - return(file) } check_ids <- function(vector,type) { @@ -49,10 +50,10 @@ package=org.Hs.eg.db } else if (species=="org.Mm.eg.db"){ package=org.Mm.eg.db + } else if (species=="org.Rn.eg.db"){ + package=org.Rn.eg.db } - - # Check if level is number if (! as.numeric(level) %% 1 == 0) { stop("Please enter an integer for level") @@ -75,8 +76,8 @@ genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] # IDs that have NA ENTREZID NAs = id$UNIPROT[which(is.na(id$ENTREZID))] - print("IDs unable to convert to ENTREZID: ") - print(NAs) + #print("IDs unable to convert to ENTREZID: ") + #print(NAs) } # Create basic profiles @@ -91,77 +92,20 @@ return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) } -# Plot profiles to PNG -plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { - if (!is.null(profile.CC)) { - png("profile.CC.png") - plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.BP)) { - png("profile.BP.png") - plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.MF)) { - png("profile.MF.png") - plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.ALL)) { - png("profile.ALL.png") - plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title) - dev.off() - } -} - -# Plot profiles to JPEG -plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { - if (!is.null(profile.CC)) { - jpeg("profile.CC.jpeg") - plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.BP)) { - jpeg("profile.BP.jpeg") - plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() +make_plot <- function(profile,percent,title,onto,plot_opt){ + + if (plot_opt == "PDF") { + file_name=paste("profile_",onto,".pdf",collapse="",sep="") + pdf(file_name) + } else if (plot_opt == "JPEG"){ + file_name=paste("profile_",onto,".jpeg",collapse="",sep="") + jpeg(file_name) + } else if (plot_opt == "PNG"){ + file_name=paste("profile_",onto,".png",collapse="",sep="") + png(file_name) } - if (!is.null(profile.MF)) { - jpeg("profile.MF.jpeg") - plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.ALL)) { - jpeg("profile.ALL.jpeg") - plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } -} - -# Plot profiles to PDF -plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { - if (!is.null(profile.CC)) { - pdf("profile.CC.pdf") - plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.BP)) { - pdf("profile.BP.pdf") - plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.MF)) { - pdf("profile.MF.pdf") - plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } - if (!is.null(profile.ALL)) { - #print("all") - pdf("profile.ALL.pdf") - plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) - dev.off() - } + plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title) + dev.off() } goprofiles = function() { @@ -212,9 +156,9 @@ } else { ncol = as.numeric(gsub("c", "", ncol)) } - header = args$header + header = str2bool(args$header) # Get file content - file = readfile(filename, header) + file = read_file(filename, header) # Extract Protein IDs list input = unlist(strsplit(as.character(file[,ncol]),";")) input = input [which(!is.na(input))] @@ -225,8 +169,7 @@ } ontoopt = strsplit(args$onto_opt, ",")[[1]] - #print(ontoopt) - #plotopt = strsplit(args[3], ",") + onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt)))) plotopt = args$plot_opt level = args$level per = as.logical(args$per) @@ -236,51 +179,15 @@ species=args$species profiles = getprofile(input, id_type, level, duplicate,species) - profile.CC = profiles[1] - #print(profile.CC) - profile.MF = profiles[2] - #print(profile.MF) - profile.BP = profiles[3] - #print(profile.BP) - profile.ALL = profiles[-3:-1] - #print(profile.ALL) - #c(profile.ALL, profile.CC, profile.MF, profile.BP) - - if ("CC" %in% ontoopt) { - write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) - if (grepl("PNG", plotopt)) { - plotPNG(profile.CC=profile.CC, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.CC = profile.CC, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.CC = profile.CC, per=per, title=title) - } - } - if ("MF" %in% ontoopt) { - write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) - if (grepl("PNG", plotopt)) { - plotPNG(profile.MF = profile.MF, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.MF = profile.MF, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.MF = profile.MF, per=per, title=title) - } - } - if ("BP" %in% ontoopt) { - write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) - if (grepl("PNG", plotopt)) { - plotPNG(profile.BP = profile.BP, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.BP = profile.BP, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.BP = profile.BP, per=per, title=title) - } + + for (index in onto_pos) { + onto = names(profiles[index]) + profile=profiles[index] + make_plot(profile,per,title,onto,plotopt) + text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="") + profile = as.data.frame(profile) + profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA + write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T) } }