Mercurial > repos > proteore > proteore_goprofiles
diff goprofiles.R @ 15:601027649251 draft default tip
"planemo upload commit 6bc2c32177acd823d5025bc90a562e6e6b2a233a"
author | proteore |
---|---|
date | Tue, 06 Apr 2021 16:59:30 +0000 |
parents | 3ddc1f78773d |
children |
line wrap: on
line diff
--- a/goprofiles.R Fri Jan 24 05:10:14 2020 -0500 +++ b/goprofiles.R Tue Apr 06 16:59:30 2021 +0000 @@ -1,12 +1,14 @@ -options(warn=-1) #TURN OFF WARNINGS !!!!!! +options(warn = -1) #TURN OFF WARNINGS !!!!!! # Load necessary libraries -suppressMessages(library(goProfiles,quietly = TRUE)) +suppressMessages(library(goProfiles, quietly = TRUE)) # Read file and return file content as data.frame -read_file <- function(path,header){ - file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) - if (inherits(file,"try-error")){ +read_file <- function(path, header) { + file <- try(read.csv(path, header = header, + sep = "\t", stringsAsFactors = FALSE, + quote = "\"", check.names = F), silent = TRUE) + if (inherits(file, "try-error")) { stop("File not found !") }else{ return(file) @@ -14,27 +16,30 @@ } #convert a string to boolean -str2bool <- function(x){ - if (any(is.element(c("t","true"),tolower(x)))){ - return (TRUE) - }else if (any(is.element(c("f","false"),tolower(x)))){ - return (FALSE) +str2bool <- function(x) { + if (any(is.element(c("t", "true"), tolower(x)))) { + return(TRUE) + }else if (any(is.element(c("f", "false"), tolower(x)))) { + return(FALSE) }else{ return(NULL) } } -check_ids <- function(vector,type) { - uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" - entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" - if (type == "Entrez"){ - return(grepl(entrez_id,vector)) +check_ids <- function(vector, type) { + # nolint start + uniprot_pattern <- + "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" + entrez_id <- "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" + # nolint end + if (type == "Entrez") { + return(grepl(entrez_id, vector)) } else if (type == "UniProt") { - return(grepl(uniprot_pattern,vector)) + return(grepl(uniprot_pattern, vector)) } } -getprofile = function(ids, id_type, level, duplicate,species) { +getprofile <- function(ids, id_type, level, duplicate, species) { #################################################################### # Arguments # - ids: list of input IDs @@ -43,89 +48,100 @@ # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) # - species #################################################################### - + library(species, character.only = TRUE, quietly = TRUE) - - if (species=="org.Hs.eg.db"){ - package=org.Hs.eg.db - } else if (species=="org.Mm.eg.db"){ - package=org.Mm.eg.db - } else if (species=="org.Rn.eg.db"){ - package=org.Rn.eg.db + + if (species == "org.Hs.eg.db") { + package <- org.Hs.eg.db #nolint + } else if (species == "org.Mm.eg.db") { + package <- org.Mm.eg.db #nolint + } else if (species == "org.Rn.eg.db") { + package <- org.Rn.eg.db #nolint } - + # Check if level is number if (! as.numeric(level) %% 1 == 0) { stop("Please enter an integer for level") } else { - level = as.numeric(level) + level <- as.numeric(level) } - #genes = as.vector(file[,ncol]) - + + # Extract Gene Entrez ID if (id_type == "Entrez") { - id = select(package, ids, "ENTREZID", multiVals = "first") + id <- select(package, ids, "ENTREZID", multiVals = "first") #nolint } else { - id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first") + id <- select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first") #nolint + } + if (duplicate) { + id <- unique(id) } - if (duplicate) { id = unique(id) } - genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] - NAs = id$UNIPROT[which(is.na(id$ENTREZID))] # IDs that have NA ENTREZID - + genes_ids <- id$ENTREZID[which(!is.na(id$ENTREZID))] + nas <- id$UNIPROT[which(is.na(id$ENTREZID))] # IDs that have NA ENTREZID #nolint + # Create basic profiles - profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) - profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) - profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) - profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) - # Print profile - # printProfiles(profile) - + profile.CC <- + basicProfile(genes_ids, onto = "CC", level = level, + orgPackage = species, empty.cats = F, ord = T, na.rm = T) + profile.BP <- + basicProfile(genes_ids, onto = "BP", level = level, + orgPackage = species, empty.cats = F, ord = T, na.rm = T) + profile.MF <- + basicProfile(genes_ids, onto = "MF", level = level, + orgPackage = species, empty.cats = F, ord = T, na.rm = T) + profile.ALL <- + basicProfile(genes_ids, onto = "ANY", level = level, + orgPackage = species, empty.cats = F, ord = T, na.rm = T) + return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) } #return height and width of plot in inches from profile -plot_size_from_nb_onto <- function(profile){ - width=10 - range = seq(25, 2000, by=25) - names(range) = seq(5,242, by=3) - nb_onto = round(nrow(profile[[1]])/25)*25 - if (nb_onto < 25) {nb_onto = 25} - if (nb_onto <= 2000) { - height= as.integer(names(which(range==nb_onto))) - } else { - height=250 - } - return (c(width,height)) +plot_size_from_nb_onto <- function(profile) { + width <- 10 + range <- seq(25, 2000, by = 25) + names(range) <- seq(5, 242, by = 3) + nb_onto <- round(nrow(profile[[1]]) / 25) * 25 + if (nb_onto < 25) { + nb_onto <- 25 + } + if (nb_onto <= 2000) { + height <- as.integer(names(which(range == nb_onto))) + } else { + height <- 250 + } + return(c(width, height)) } -make_plot <- function(profile,percent,title,onto,plot_opt){ - - tmp <- plot_size_from_nb_onto (profile) +make_plot <- function(profile, percent, title, onto, plot_opt) { + + tmp <- plot_size_from_nb_onto(profile) width <- tmp[1] height <- tmp[2] - + if (plot_opt == "PDF") { - file_name=paste("profile_",onto,".pdf",collapse="",sep="") - pdf(file_name, width=width, heigh=height) - } else if (plot_opt == "JPEG"){ - file_name=paste("profile_",onto,".jpeg",collapse="",sep="") - jpeg(file_name,width=width, height=height, units = "in", res=100) - } else if (plot_opt == "PNG"){ - file_name=paste("profile_",onto,".png",collapse="",sep="") - png(file_name,width=width, height=height, units = "in", res=100) + file_name <- paste("profile_", onto, ".pdf", collapse = "", sep = "") + pdf(file_name, width = width, height = height) + } else if (plot_opt == "JPEG") { + file_name <- paste("profile_", onto, ".jpeg", collapse = "", sep = "") + jpeg(file_name, width = width, height = height, units = "in", res = 100) + } else if (plot_opt == "PNG") { + file_name <- paste("profile_", onto, ".png", collapse = "", sep = "") + png(file_name, width = width, height = height, units = "in", res = 100) } - plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title) + plotProfiles(profile, percentage = percent, + multiplePlots = FALSE, aTitle = title) dev.off() } -goprofiles = function() { +goprofiles <- function() { args <- commandArgs(TRUE) - if(length(args)<1) { + if (length(args) < 1) { args <- c("--help") } - + # Help section - if("--help" %in% args) { + if ("--help" %in% args) { cat("Selection and Annotation HPA Arguments: --input_type: type of input (list of id or filename) @@ -141,63 +157,66 @@ --duplicate: remove dupliate input IDs (true/false) --text_output: text output filename \n --species") - q(save="no") + q(save = "no") } - + # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 + parse_args <- function(x) strsplit(sub("^--", "", x), "=") + args_df <- as.data.frame(do.call("rbind", parse_args(args))) + args <- as.list(as.character(args_df$V2)) + names(args) <- args_df$V1 - #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") - - id_type = args$id_type - input_type = args$input_type + + id_type <- args$id_type + input_type <- args$input_type if (input_type == "text") { - input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";")) + input <- unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]], ";")) } else if (input_type == "file") { - filename = args$input - ncol = args$ncol + filename <- args$input + ncol <- args$ncol # Check ncol if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { stop("Please enter an integer for level") } else { - ncol = as.numeric(gsub("c", "", ncol)) + ncol <- as.numeric(gsub("c", "", ncol)) } - header = str2bool(args$header) + header <- str2bool(args$header) # Get file content - file = read_file(filename, header) + file <- read_file(filename, header) # Extract Protein IDs list - input = unlist(strsplit(as.character(file[,ncol]),";")) + input <- unlist(strsplit(as.character(file[, ncol]), ";")) } - input = input [which(!is.na(gsub("NA",NA,input)))] - - if (! any(check_ids(input,id_type))){ - stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) + input <- input [which(!is.na(gsub("NA", NA, input)))] + + if (! any(check_ids(input, id_type))) { + stop(paste(id_type, + "not found in your ids list, please check your IDs in input + or the selected column of your input file")) } - - ontoopt = strsplit(args$onto_opt, ",")[[1]] - onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt)))) - plotopt = args$plot_opt - level = args$level - per = as.logical(args$per) - title = args$title - duplicate = str2bool(args$duplicate) - text_output = args$text_output - species=args$species + + ontoopt <- strsplit(args$onto_opt, ",")[[1]] + onto_pos <- as.integer(gsub("BP", 3, gsub("MF", 2, gsub("CC", 1, ontoopt)))) + plotopt <- args$plot_opt + level <- args$level + per <- as.logical(args$per) + title <- args$title + duplicate <- str2bool(args$duplicate) + text_output <- args$text_output + species <- args$species - profiles = getprofile(input, id_type, level, duplicate,species) - + profiles <- getprofile(input, id_type, level, duplicate, species) + for (index in onto_pos) { - onto = names(profiles[index]) - profile=profiles[index] - make_plot(profile,per,title,onto,plotopt) - text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="") - profile = as.data.frame(profile) - profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA - write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T) + onto <- names(profiles[index]) + profile <- profiles[index] + make_plot(profile, per, title, onto, plotopt) + text_output <- paste("goProfiles_", onto, "_", + title, ".tsv", sep = "", collapse = "") + profile <- as.data.frame(profile) + profile <- as.data.frame(apply(profile, c(1, 2), + function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA + write.table(profile, text_output, sep = "\t", + row.names = FALSE, quote = FALSE, col.names = T) } }