comparison goprofiles.R @ 5:781072a65600 draft

planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
author proteore
date Wed, 19 Sep 2018 05:49:06 -0400
parents 715002a394ec
children 6afe8166a9a4
comparison
equal deleted inserted replaced
4:715002a394ec 5:781072a65600
1 # Load necessary libraries 1 # Load necessary libraries
2 library(org.Hs.eg.db) 2 library(goProfiles,quietly = TRUE)
3 library(goProfiles)
4 3
5 # Read file and return file content as data.frame 4 # Read file and return file content as data.frame
6 readfile = function(filename, header) { 5 readfile = function(filename, header) {
7 if (header == "true") { 6 if (header == "true") {
8 # Read only first line of the file as header: 7 # Read only first line of the file as header:
20 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] 19 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
21 } 20 }
22 return(file) 21 return(file)
23 } 22 }
24 23
25 getprofile = function(ids, id_type, level, duplicate) { 24 check_ids <- function(vector,type) {
25 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
26 entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
27 if (type == "Entrez"){
28 return(grepl(entrez_id,vector))
29 } else if (type == "UniProt") {
30 return(grepl(uniprot_pattern,vector))
31 }
32 }
33
34 getprofile = function(ids, id_type, level, duplicate,species) {
26 #################################################################### 35 ####################################################################
27 # Arguments 36 # Arguments
28 # - ids: list of input IDs 37 # - ids: list of input IDs
29 # - id_type: type of input IDs (UniProt/ENTREZID) 38 # - id_type: type of input IDs (UniProt/ENTREZID)
30 # - level 39 # - level
31 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) 40 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
41 # - species
32 #################################################################### 42 ####################################################################
43
44 library(species, character.only = TRUE, quietly = TRUE)
45
46 if (species=="org.Hs.eg.db"){
47 package=org.Hs.eg.db
48 } else if (species=="org.Mm.eg.db"){
49 package=org.Mm.eg.db
50 }
51
52
33 53
34 # Check if level is number 54 # Check if level is number
35 if (! as.numeric(level) %% 1 == 0) { 55 if (! as.numeric(level) %% 1 == 0) {
36 stop("Please enter an integer for level") 56 stop("Please enter an integer for level")
37 } 57 } else {
38 else {
39 level = as.numeric(level) 58 level = as.numeric(level)
40 } 59 }
41 #genes = as.vector(file[,ncol]) 60 #genes = as.vector(file[,ncol])
42 61
43 # Extract Gene Entrez ID 62 # Extract Gene Entrez ID
44 if (id_type == "Entrez") { 63 if (id_type == "Entrez") {
45 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") 64 id = select(package, ids, "ENTREZID", multiVals = "first")
46 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] 65 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
47 } 66 } else {
48 else {
49 genes_ids = c() 67 genes_ids = c()
50 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") 68 id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first")
51 if (duplicate == "TRUE") { 69 if (duplicate == "TRUE") {
52 id = unique(id) 70 id = unique(id)
53 } 71 }
54 print(id[[1]]) 72 print(id[[1]])
55 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] 73 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
58 print("IDs unable to convert to ENTREZID: ") 76 print("IDs unable to convert to ENTREZID: ")
59 print(NAs) 77 print(NAs)
60 } 78 }
61 79
62 # Create basic profiles 80 # Create basic profiles
63 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) 81 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
64 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) 82 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
65 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) 83 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
66 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) 84 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
67 85
68 # Print profile 86 # Print profile
69 # printProfiles(profile) 87 # printProfiles(profile)
70 88
71 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) 89 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
163 --plot_opt: plot extension options (PDF/JPEG/PNG) 181 --plot_opt: plot extension options (PDF/JPEG/PNG)
164 --level: 1-3 182 --level: 1-3
165 --per 183 --per
166 --title: title of the plot 184 --title: title of the plot
167 --duplicate: remove dupliate input IDs (true/false) 185 --duplicate: remove dupliate input IDs (true/false)
168 --text_output: text output filename \n") 186 --text_output: text output filename \n
187 --species")
169 q(save="no") 188 q(save="no")
170 } 189 }
171 190
172 # Parse arguments 191 # Parse arguments
173 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 192 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
174 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 193 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
175 args <- as.list(as.character(argsDF$V2)) 194 args <- as.list(as.character(argsDF$V2))
176 names(args) <- argsDF$V1 195 names(args) <- argsDF$V1
177 196
197 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
198 #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
199
200 id_type = args$id_type
178 input_type = args$input_type 201 input_type = args$input_type
179 if (input_type == "text") { 202 if (input_type == "text") {
180 input = strsplit(args$input, "[ \t\n]+")[[1]] 203 input = strsplit(args$input, "[ \t\n]+")[[1]]
181 } 204 } else if (input_type == "file") {
182 else if (input_type == "file") {
183 filename = args$input 205 filename = args$input
184 ncol = args$ncol 206 ncol = args$ncol
185 # Check ncol 207 # Check ncol
186 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { 208 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
187 stop("Please enter an integer for level") 209 stop("Please enter an integer for level")
188 } 210 } else {
189 else {
190 ncol = as.numeric(gsub("c", "", ncol)) 211 ncol = as.numeric(gsub("c", "", ncol))
191 } 212 }
192 header = args$header 213 header = args$header
193 # Get file content 214 # Get file content
194 file = readfile(filename, header) 215 file = readfile(filename, header)
196 input = c() 217 input = c()
197 for (row in as.character(file[,ncol])) { 218 for (row in as.character(file[,ncol])) {
198 input = c(input, strsplit(row, ";")[[1]][1]) 219 input = c(input, strsplit(row, ";")[[1]][1])
199 } 220 }
200 } 221 }
201 id_type = args$id_type 222
223 if (! any(check_ids(input,id_type))){
224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
225 }
226
202 ontoopt = strsplit(args$onto_opt, ",")[[1]] 227 ontoopt = strsplit(args$onto_opt, ",")[[1]]
203 #print(ontoopt) 228 #print(ontoopt)
204 #plotopt = strsplit(args[3], ",") 229 #plotopt = strsplit(args[3], ",")
205 plotopt = args$plot_opt 230 plotopt = args$plot_opt
206 level = args$level 231 level = args$level
207 per = as.logical(args$per) 232 per = as.logical(args$per)
208 title = args$title 233 title = args$title
209 duplicate = args$duplicate 234 duplicate = args$duplicate
210 text_output = args$text_output 235 text_output = args$text_output
211 236 species=args$species
212 profiles = getprofile(input, id_type, level, duplicate) 237
238 profiles = getprofile(input, id_type, level, duplicate,species)
213 profile.CC = profiles[1] 239 profile.CC = profiles[1]
214 #print(profile.CC) 240 #print(profile.CC)
215 profile.MF = profiles[2] 241 profile.MF = profiles[2]
216 #print(profile.MF) 242 #print(profile.MF)
217 profile.BP = profiles[3] 243 profile.BP = profiles[3]