diff goprofiles.R @ 5:781072a65600 draft

planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author proteore
date Wed, 19 Sep 2018 05:49:06 -0400
parents 715002a394ec
children 6afe8166a9a4
line wrap: on
line diff
--- a/goprofiles.R	Fri Mar 23 10:47:17 2018 -0400
+++ b/goprofiles.R	Wed Sep 19 05:49:06 2018 -0400
@@ -1,6 +1,5 @@
 # Load necessary libraries
-library(org.Hs.eg.db)
-library(goProfiles)
+library(goProfiles,quietly = TRUE)
 
 # Read file and return file content as data.frame
 readfile = function(filename, header) {
@@ -22,32 +21,51 @@
   return(file)
 }
 
-getprofile = function(ids, id_type, level, duplicate) {
+check_ids <- function(vector,type) {
+  uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
+  entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+  if (type == "Entrez"){
+    return(grepl(entrez_id,vector))
+  } else if (type == "UniProt") {
+    return(grepl(uniprot_pattern,vector))
+  }
+}
+
+getprofile = function(ids, id_type, level, duplicate,species) {
   ####################################################################
   # Arguments
   #   - ids: list of input IDs
   #   - id_type: type of input IDs (UniProt/ENTREZID)
   #   - level
   #   - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
+  #   - species
   ####################################################################
   
+  library(species, character.only = TRUE, quietly = TRUE)
+  
+  if (species=="org.Hs.eg.db"){
+    package=org.Hs.eg.db
+  } else if (species=="org.Mm.eg.db"){
+    package=org.Mm.eg.db
+  }
+  
+  
+  
   # Check if level is number
   if (! as.numeric(level) %% 1 == 0) {
     stop("Please enter an integer for level")
-  }
-  else {
+  } else {
     level = as.numeric(level)
   }
   #genes = as.vector(file[,ncol])
   
   # Extract Gene Entrez ID
   if (id_type == "Entrez") {
-    id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
+    id = select(package, ids, "ENTREZID", multiVals = "first")
     genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
-  }
-  else {
+  } else {
     genes_ids = c()
-    id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
+    id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first")
     if (duplicate == "TRUE") {
       id = unique(id)
     }
@@ -60,10 +78,10 @@
   }
   
   # Create basic profiles
-  profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+  profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
   
   # Print profile
   # printProfiles(profile)
@@ -165,7 +183,8 @@
         --per
         --title: title of the plot
         --duplicate: remove dupliate input IDs (true/false)
-        --text_output: text output filename \n")
+        --text_output: text output filename \n
+        --species")
     q(save="no")
   }
   
@@ -175,18 +194,20 @@
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
 
+  #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
+  #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
+  
+  id_type = args$id_type
   input_type = args$input_type
   if (input_type == "text") {
     input = strsplit(args$input, "[ \t\n]+")[[1]]
-  }
-  else if (input_type == "file") {
+  } else if (input_type == "file") {
     filename = args$input
     ncol = args$ncol
     # Check ncol
     if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
       stop("Please enter an integer for level")
-    }
-    else {
+    } else {
       ncol = as.numeric(gsub("c", "", ncol))
     }
     header = args$header
@@ -198,7 +219,11 @@
       input = c(input, strsplit(row, ";")[[1]][1])
     }
   }
-  id_type = args$id_type
+  
+  if (! any(check_ids(input,id_type))){
+    stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
+  }
+  
   ontoopt = strsplit(args$onto_opt, ",")[[1]]
   #print(ontoopt)
   #plotopt = strsplit(args[3], ",")
@@ -208,8 +233,9 @@
   title = args$title
   duplicate = args$duplicate
   text_output = args$text_output
+  species=args$species
 
-  profiles = getprofile(input, id_type, level, duplicate)
+  profiles = getprofile(input, id_type, level, duplicate,species)
   profile.CC = profiles[1]
   #print(profile.CC)
   profile.MF = profiles[2]