Mercurial > repos > proteore > proteore_goprofiles
diff goprofiles.R @ 5:781072a65600 draft
planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author | proteore |
---|---|
date | Wed, 19 Sep 2018 05:49:06 -0400 |
parents | 715002a394ec |
children | 6afe8166a9a4 |
line wrap: on
line diff
--- a/goprofiles.R Fri Mar 23 10:47:17 2018 -0400 +++ b/goprofiles.R Wed Sep 19 05:49:06 2018 -0400 @@ -1,6 +1,5 @@ # Load necessary libraries -library(org.Hs.eg.db) -library(goProfiles) +library(goProfiles,quietly = TRUE) # Read file and return file content as data.frame readfile = function(filename, header) { @@ -22,32 +21,51 @@ return(file) } -getprofile = function(ids, id_type, level, duplicate) { +check_ids <- function(vector,type) { + uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" + entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" + if (type == "Entrez"){ + return(grepl(entrez_id,vector)) + } else if (type == "UniProt") { + return(grepl(uniprot_pattern,vector)) + } +} + +getprofile = function(ids, id_type, level, duplicate,species) { #################################################################### # Arguments # - ids: list of input IDs # - id_type: type of input IDs (UniProt/ENTREZID) # - level # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) + # - species #################################################################### + library(species, character.only = TRUE, quietly = TRUE) + + if (species=="org.Hs.eg.db"){ + package=org.Hs.eg.db + } else if (species=="org.Mm.eg.db"){ + package=org.Mm.eg.db + } + + + # Check if level is number if (! as.numeric(level) %% 1 == 0) { stop("Please enter an integer for level") - } - else { + } else { level = as.numeric(level) } #genes = as.vector(file[,ncol]) # Extract Gene Entrez ID if (id_type == "Entrez") { - id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") + id = select(package, ids, "ENTREZID", multiVals = "first") genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] - } - else { + } else { genes_ids = c() - id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") + id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first") if (duplicate == "TRUE") { id = unique(id) } @@ -60,10 +78,10 @@ } # Create basic profiles - profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) - profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) - profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) - profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) + profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) + profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) + profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) + profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) # Print profile # printProfiles(profile) @@ -165,7 +183,8 @@ --per --title: title of the plot --duplicate: remove dupliate input IDs (true/false) - --text_output: text output filename \n") + --text_output: text output filename \n + --species") q(save="no") } @@ -175,18 +194,20 @@ args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 + #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda") + + id_type = args$id_type input_type = args$input_type if (input_type == "text") { input = strsplit(args$input, "[ \t\n]+")[[1]] - } - else if (input_type == "file") { + } else if (input_type == "file") { filename = args$input ncol = args$ncol # Check ncol if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { stop("Please enter an integer for level") - } - else { + } else { ncol = as.numeric(gsub("c", "", ncol)) } header = args$header @@ -198,7 +219,11 @@ input = c(input, strsplit(row, ";")[[1]][1]) } } - id_type = args$id_type + + if (! any(check_ids(input,id_type))){ + stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) + } + ontoopt = strsplit(args$onto_opt, ",")[[1]] #print(ontoopt) #plotopt = strsplit(args[3], ",") @@ -208,8 +233,9 @@ title = args$title duplicate = args$duplicate text_output = args$text_output + species=args$species - profiles = getprofile(input, id_type, level, duplicate) + profiles = getprofile(input, id_type, level, duplicate,species) profile.CC = profiles[1] #print(profile.CC) profile.MF = profiles[2]