annotate goprofiles.R @ 11:3ddc1f78773d draft

planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
author proteore
date Fri, 28 Jun 2019 05:11:15 -0400
parents 2138e0035e57
children 601027649251
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
6afe8166a9a4 planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
proteore
parents: 5
diff changeset
1 options(warn=-1) #TURN OFF WARNINGS !!!!!!
6afe8166a9a4 planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
proteore
parents: 5
diff changeset
2
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
3 # Load necessary libraries
6
6afe8166a9a4 planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
proteore
parents: 5
diff changeset
4 suppressMessages(library(goProfiles,quietly = TRUE))
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
5
4
715002a394ec planemo upload commit b36435833bf54f90f62cc240f2cda1c889161b23-dirty
proteore
parents: 2
diff changeset
6 # Read file and return file content as data.frame
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
7 read_file <- function(path,header){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
8 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
9 if (inherits(file,"try-error")){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
10 stop("File not found !")
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
11 }else{
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
12 return(file)
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
13 }
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
14 }
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
15
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
16 #convert a string to boolean
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
17 str2bool <- function(x){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
18 if (any(is.element(c("t","true"),tolower(x)))){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
19 return (TRUE)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
20 }else if (any(is.element(c("f","false"),tolower(x)))){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
21 return (FALSE)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
22 }else{
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
23 return(NULL)
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
24 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
25 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
26
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
27 check_ids <- function(vector,type) {
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
28 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
6
6afe8166a9a4 planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
proteore
parents: 5
diff changeset
29 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
30 if (type == "Entrez"){
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
31 return(grepl(entrez_id,vector))
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
32 } else if (type == "UniProt") {
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
33 return(grepl(uniprot_pattern,vector))
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
34 }
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
35 }
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
36
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
37 getprofile = function(ids, id_type, level, duplicate,species) {
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
38 ####################################################################
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
39 # Arguments
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
40 # - ids: list of input IDs
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
41 # - id_type: type of input IDs (UniProt/ENTREZID)
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
42 # - level
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
43 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
44 # - species
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
45 ####################################################################
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
46
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
47 library(species, character.only = TRUE, quietly = TRUE)
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
48
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
49 if (species=="org.Hs.eg.db"){
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
50 package=org.Hs.eg.db
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
51 } else if (species=="org.Mm.eg.db"){
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
52 package=org.Mm.eg.db
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
53 } else if (species=="org.Rn.eg.db"){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
54 package=org.Rn.eg.db
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
55 }
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
56
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
57 # Check if level is number
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
58 if (! as.numeric(level) %% 1 == 0) {
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
59 stop("Please enter an integer for level")
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
60 } else {
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
61 level = as.numeric(level)
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
62 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
63 #genes = as.vector(file[,ncol])
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
64
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
65 # Extract Gene Entrez ID
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
66 if (id_type == "Entrez") {
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
67 id = select(package, ids, "ENTREZID", multiVals = "first")
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
68 } else {
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
69 id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first")
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
70 }
11
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
71 if (duplicate) { id = unique(id) }
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
72 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
73 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] # IDs that have NA ENTREZID
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
74
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
75 # Create basic profiles
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
76 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
77 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
78 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
79 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
80 # Print profile
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
81 # printProfiles(profile)
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
82
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
83 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
84 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
85
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
86 #return height and width of plot in inches from profile
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
87 plot_size_from_nb_onto <- function(profile){
10
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
88 width=10
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
89 range = seq(25, 2000, by=25)
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
90 names(range) = seq(5,242, by=3)
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
91 nb_onto = round(nrow(profile[[1]])/25)*25
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
92 if (nb_onto < 25) {nb_onto = 25}
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
93 if (nb_onto <= 2000) {
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
94 height= as.integer(names(which(range==nb_onto)))
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
95 } else {
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
96 height=250
2138e0035e57 planemo upload commit 4efc56eb769fbceb66c64181441ff8781d523454-dirty
proteore
parents: 9
diff changeset
97 }
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
98 return (c(width,height))
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
99 }
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
100
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
101 make_plot <- function(profile,percent,title,onto,plot_opt){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
102
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
103 tmp <- plot_size_from_nb_onto (profile)
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
104 width <- tmp[1]
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
105 height <- tmp[2]
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
106
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
107 if (plot_opt == "PDF") {
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
108 file_name=paste("profile_",onto,".pdf",collapse="",sep="")
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
109 pdf(file_name, width=width, heigh=height)
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
110 } else if (plot_opt == "JPEG"){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
111 file_name=paste("profile_",onto,".jpeg",collapse="",sep="")
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
112 jpeg(file_name,width=width, height=height, units = "in", res=100)
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
113 } else if (plot_opt == "PNG"){
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
114 file_name=paste("profile_",onto,".png",collapse="",sep="")
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
115 png(file_name,width=width, height=height, units = "in", res=100)
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
116 }
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
117 plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
118 dev.off()
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
119 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
120
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
121 goprofiles = function() {
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
122 args <- commandArgs(TRUE)
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
123 if(length(args)<1) {
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
124 args <- c("--help")
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
125 }
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
126
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
127 # Help section
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
128 if("--help" %in% args) {
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
129 cat("Selection and Annotation HPA
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
130 Arguments:
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
131 --input_type: type of input (list of id or filename)
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
132 --input: input
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
133 --ncol: the column number which you would like to apply...
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
134 --header: true/false if your file contains a header
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
135 --id_type: the type of input IDs (UniProt/EntrezID)
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
136 --onto_opt: ontology options
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
137 --plot_opt: plot extension options (PDF/JPEG/PNG)
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
138 --level: 1-3
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
139 --per
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
140 --title: title of the plot
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
141 --duplicate: remove dupliate input IDs (true/false)
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
142 --text_output: text output filename \n
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
143 --species")
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
144 q(save="no")
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
145 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
146
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
147 # Parse arguments
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
148 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
149 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
150 args <- as.list(as.character(argsDF$V2))
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
151 names(args) <- argsDF$V1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
152
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
153 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
154 #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
155
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
156 id_type = args$id_type
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
157 input_type = args$input_type
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
158 if (input_type == "text") {
11
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
159 input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
160 } else if (input_type == "file") {
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
161 filename = args$input
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
162 ncol = args$ncol
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
163 # Check ncol
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
164 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
165 stop("Please enter an integer for level")
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
166 } else {
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
167 ncol = as.numeric(gsub("c", "", ncol))
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
168 }
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
169 header = str2bool(args$header)
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
170 # Get file content
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
171 file = read_file(filename, header)
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
172 # Extract Protein IDs list
7
3e138d54c105 planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
proteore
parents: 6
diff changeset
173 input = unlist(strsplit(as.character(file[,ncol]),";"))
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
174 }
11
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
175 input = input [which(!is.na(gsub("NA",NA,input)))]
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
176
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
177 if (! any(check_ids(input,id_type))){
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
178 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
179 }
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
180
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
181 ontoopt = strsplit(args$onto_opt, ",")[[1]]
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
182 onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt))))
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
183 plotopt = args$plot_opt
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
184 level = args$level
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
185 per = as.logical(args$per)
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
186 title = args$title
11
3ddc1f78773d planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 10
diff changeset
187 duplicate = str2bool(args$duplicate)
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
188 text_output = args$text_output
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
189 species=args$species
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
190
5
781072a65600 planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
proteore
parents: 4
diff changeset
191 profiles = getprofile(input, id_type, level, duplicate,species)
9
948fecb6a40b planemo upload commit 973d782455fd6a7df7ba9bce0a7878de53bc1e68-dirty
proteore
parents: 8
diff changeset
192
8
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
193 for (index in onto_pos) {
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
194 onto = names(profiles[index])
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
195 profile=profiles[index]
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
196 make_plot(profile,per,title,onto,plotopt)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
197 text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="")
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
198 profile = as.data.frame(profile)
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
199 profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA
386145573c19 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents: 7
diff changeset
200 write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T)
1
1236ee08ccb8 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
201 }
0
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
202 }
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
203
d89c09253c8d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
204 goprofiles()