comparison goprofiles.R @ 8:386145573c19 draft

planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author proteore
date Tue, 18 Dec 2018 09:54:57 -0500
parents 3e138d54c105
children 948fecb6a40b
comparison
equal deleted inserted replaced
7:3e138d54c105 8:386145573c19
2 2
3 # Load necessary libraries 3 # Load necessary libraries
4 suppressMessages(library(goProfiles,quietly = TRUE)) 4 suppressMessages(library(goProfiles,quietly = TRUE))
5 5
6 # Read file and return file content as data.frame 6 # Read file and return file content as data.frame
7 readfile = function(filename, header) { 7 read_file <- function(path,header){
8 if (header == "true") { 8 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
9 # Read only first line of the file as header: 9 if (inherits(file,"try-error")){
10 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") 10 stop("File not found !")
11 #Read the data of the files (skipping the first row) 11 }else{
12 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") 12 return(file)
13 # Remove empty rows
14 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
15 #And assign the header to the data
16 names(file) <- headers
17 } 13 }
18 else { 14 }
19 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") 15
20 # Remove empty rows 16 #convert a string to boolean
21 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] 17 str2bool <- function(x){
18 if (any(is.element(c("t","true"),tolower(x)))){
19 return (TRUE)
20 }else if (any(is.element(c("f","false"),tolower(x)))){
21 return (FALSE)
22 }else{
23 return(NULL)
22 } 24 }
23 return(file)
24 } 25 }
25 26
26 check_ids <- function(vector,type) { 27 check_ids <- function(vector,type) {
27 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" 28 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
28 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" 29 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
47 48
48 if (species=="org.Hs.eg.db"){ 49 if (species=="org.Hs.eg.db"){
49 package=org.Hs.eg.db 50 package=org.Hs.eg.db
50 } else if (species=="org.Mm.eg.db"){ 51 } else if (species=="org.Mm.eg.db"){
51 package=org.Mm.eg.db 52 package=org.Mm.eg.db
53 } else if (species=="org.Rn.eg.db"){
54 package=org.Rn.eg.db
52 } 55 }
53
54
55 56
56 # Check if level is number 57 # Check if level is number
57 if (! as.numeric(level) %% 1 == 0) { 58 if (! as.numeric(level) %% 1 == 0) {
58 stop("Please enter an integer for level") 59 stop("Please enter an integer for level")
59 } else { 60 } else {
73 } 74 }
74 print(id[[1]]) 75 print(id[[1]])
75 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] 76 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
76 # IDs that have NA ENTREZID 77 # IDs that have NA ENTREZID
77 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] 78 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
78 print("IDs unable to convert to ENTREZID: ") 79 #print("IDs unable to convert to ENTREZID: ")
79 print(NAs) 80 #print(NAs)
80 } 81 }
81 82
82 # Create basic profiles 83 # Create basic profiles
83 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) 84 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
84 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T) 85 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
89 # printProfiles(profile) 90 # printProfiles(profile)
90 91
91 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) 92 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
92 } 93 }
93 94
94 # Plot profiles to PNG 95 make_plot <- function(profile,percent,title,onto,plot_opt){
95 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { 96
96 if (!is.null(profile.CC)) { 97 if (plot_opt == "PDF") {
97 png("profile.CC.png") 98 file_name=paste("profile_",onto,".pdf",collapse="",sep="")
98 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) 99 pdf(file_name)
99 dev.off() 100 } else if (plot_opt == "JPEG"){
101 file_name=paste("profile_",onto,".jpeg",collapse="",sep="")
102 jpeg(file_name)
103 } else if (plot_opt == "PNG"){
104 file_name=paste("profile_",onto,".png",collapse="",sep="")
105 png(file_name)
100 } 106 }
101 if (!is.null(profile.BP)) { 107 plotProfiles(profile, percentage=percent, multiplePlots=FALSE, aTitle=title)
102 png("profile.BP.png") 108 dev.off()
103 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
104 dev.off()
105 }
106 if (!is.null(profile.MF)) {
107 png("profile.MF.png")
108 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
109 dev.off()
110 }
111 if (!is.null(profile.ALL)) {
112 png("profile.ALL.png")
113 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
114 dev.off()
115 }
116 }
117
118 # Plot profiles to JPEG
119 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
120 if (!is.null(profile.CC)) {
121 jpeg("profile.CC.jpeg")
122 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
123 dev.off()
124 }
125 if (!is.null(profile.BP)) {
126 jpeg("profile.BP.jpeg")
127 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
128 dev.off()
129 }
130 if (!is.null(profile.MF)) {
131 jpeg("profile.MF.jpeg")
132 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
133 dev.off()
134 }
135 if (!is.null(profile.ALL)) {
136 jpeg("profile.ALL.jpeg")
137 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
138 dev.off()
139 }
140 }
141
142 # Plot profiles to PDF
143 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
144 if (!is.null(profile.CC)) {
145 pdf("profile.CC.pdf")
146 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
147 dev.off()
148 }
149 if (!is.null(profile.BP)) {
150 pdf("profile.BP.pdf")
151 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
152 dev.off()
153 }
154 if (!is.null(profile.MF)) {
155 pdf("profile.MF.pdf")
156 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
157 dev.off()
158 }
159 if (!is.null(profile.ALL)) {
160 #print("all")
161 pdf("profile.ALL.pdf")
162 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
163 dev.off()
164 }
165 } 109 }
166 110
167 goprofiles = function() { 111 goprofiles = function() {
168 args <- commandArgs(TRUE) 112 args <- commandArgs(TRUE)
169 if(length(args)<1) { 113 if(length(args)<1) {
210 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { 154 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
211 stop("Please enter an integer for level") 155 stop("Please enter an integer for level")
212 } else { 156 } else {
213 ncol = as.numeric(gsub("c", "", ncol)) 157 ncol = as.numeric(gsub("c", "", ncol))
214 } 158 }
215 header = args$header 159 header = str2bool(args$header)
216 # Get file content 160 # Get file content
217 file = readfile(filename, header) 161 file = read_file(filename, header)
218 # Extract Protein IDs list 162 # Extract Protein IDs list
219 input = unlist(strsplit(as.character(file[,ncol]),";")) 163 input = unlist(strsplit(as.character(file[,ncol]),";"))
220 input = input [which(!is.na(input))] 164 input = input [which(!is.na(input))]
221 } 165 }
222 166
223 if (! any(check_ids(input,id_type))){ 167 if (! any(check_ids(input,id_type))){
224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) 168 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
225 } 169 }
226 170
227 ontoopt = strsplit(args$onto_opt, ",")[[1]] 171 ontoopt = strsplit(args$onto_opt, ",")[[1]]
228 #print(ontoopt) 172 onto_pos = as.integer(gsub("BP",3,gsub("MF",2,gsub("CC",1,ontoopt))))
229 #plotopt = strsplit(args[3], ",")
230 plotopt = args$plot_opt 173 plotopt = args$plot_opt
231 level = args$level 174 level = args$level
232 per = as.logical(args$per) 175 per = as.logical(args$per)
233 title = args$title 176 title = args$title
234 duplicate = args$duplicate 177 duplicate = args$duplicate
235 text_output = args$text_output 178 text_output = args$text_output
236 species=args$species 179 species=args$species
237 180
238 profiles = getprofile(input, id_type, level, duplicate,species) 181 profiles = getprofile(input, id_type, level, duplicate,species)
239 profile.CC = profiles[1] 182
240 #print(profile.CC) 183 for (index in onto_pos) {
241 profile.MF = profiles[2] 184 onto = names(profiles[index])
242 #print(profile.MF) 185 profile=profiles[index]
243 profile.BP = profiles[3] 186 make_plot(profile,per,title,onto,plotopt)
244 #print(profile.BP) 187 text_output=paste("goProfiles_",onto,"_",title,".tsv",sep="",collapse="")
245 profile.ALL = profiles[-3:-1] 188 profile = as.data.frame(profile)
246 #print(profile.ALL) 189 profile <- as.data.frame(apply(profile, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" and " " to NA
247 #c(profile.ALL, profile.CC, profile.MF, profile.BP) 190 write.table(profile, text_output, sep="\t", row.names = FALSE, quote=FALSE, col.names = T)
248
249 if ("CC" %in% ontoopt) {
250 write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
251 if (grepl("PNG", plotopt)) {
252 plotPNG(profile.CC=profile.CC, per=per, title=title)
253 }
254 if (grepl("JPEG", plotopt)) {
255 plotJPEG(profile.CC = profile.CC, per=per, title=title)
256 }
257 if (grepl("PDF", plotopt)) {
258 plotPDF(profile.CC = profile.CC, per=per, title=title)
259 }
260 }
261 if ("MF" %in% ontoopt) {
262 write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
263 if (grepl("PNG", plotopt)) {
264 plotPNG(profile.MF = profile.MF, per=per, title=title)
265 }
266 if (grepl("JPEG", plotopt)) {
267 plotJPEG(profile.MF = profile.MF, per=per, title=title)
268 }
269 if (grepl("PDF", plotopt)) {
270 plotPDF(profile.MF = profile.MF, per=per, title=title)
271 }
272 }
273 if ("BP" %in% ontoopt) {
274 write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
275 if (grepl("PNG", plotopt)) {
276 plotPNG(profile.BP = profile.BP, per=per, title=title)
277 }
278 if (grepl("JPEG", plotopt)) {
279 plotJPEG(profile.BP = profile.BP, per=per, title=title)
280 }
281 if (grepl("PDF", plotopt)) {
282 plotPDF(profile.BP = profile.BP, per=per, title=title)
283 }
284 } 191 }
285 } 192 }
286 193
287 goprofiles() 194 goprofiles()