comparison goprofiles.R @ 0:d89c09253c8d draft

planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
author proteore
date Sun, 26 Nov 2017 19:19:39 -0500
parents
children 1236ee08ccb8
comparison
equal deleted inserted replaced
-1:000000000000 0:d89c09253c8d
1 # Load necessary libraries
2 library("org.Hs.eg.db", quietly=TRUE)
3 library("goProfiles", quietly=TRUE)
4
5 # Read file and return file content as data.frame?
6 readfile = function(filename, header) {
7 if (header == "true") {
8 # Read only the first two lines of the files as data (without headers):
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
10 #print("header")
11 #print(headers)
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
13 #headers_names <- sapply(headers, paste, collapse = "_")
14 #print(headers_names)
15 #Read the data of the files (skipping the first 2 rows):
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
17 #print(file[1,])
18 #And assign the headers of step two to the data:
19 names(file) <- headers
20 }
21 else {
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
23 }
24 return(file)
25 }
26
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
28 #test = readfile(filename)
29 #str(test)
30 #str(test$Gene.names)
31 getprofile = function(ids, id_type, level, duplicate) {
32 ####################################################################
33 # Arguments
34 # - ids: list of input IDs
35 # - id_type: type of input IDs (UniProt/ENTREZID)
36 # - level
37 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
38 ####################################################################
39
40 # Check if level is number
41 if (! as.numeric(level) %% 1 == 0) {
42 stop("Please enter an integer for level")
43 }
44 else {
45 level = as.numeric(level)
46 }
47 #genes = as.vector(file[,ncol])
48
49 # Extract Gene Entrez ID
50 if (id_type == "Entrez") {
51 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
52 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
53 }
54 else {
55 genes_ids = c()
56 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
57 if (duplicate == "TRUE") {
58 id = unique(id)
59 }
60 print(id[[1]])
61 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
62 # IDs that have NA ENTREZID
63 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
64 print("IDs unable to convert to ENTREZID: ")
65 print(NAs)
66 }
67 #print(genes_ids)
68 # Convert Protein IDs into entrez ids
69
70 # for (i in 1:length(id$UNIPROT)) {
71 # print(i)
72 # if (is.na(id[[2]][i])) {
73 # print(id[[2]][i])
74 # }
75 # }
76 # a = id[which(id$ENTREZID == "NA"),]
77 # print(a)
78 # print(a$UNIPROT)
79 #print(id[[1]][which(is.na(id$ENTREZID))])
80 #print(genes_ids)
81 # for (gene in genes) {
82 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
83 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
84 # print(id)
85 # genes_ids = append(genes_ids, id$ENTREZID)
86 # }
87 #print(genes_ids)
88
89 # Create basic profiles
90 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
91 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
92 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
93 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
94
95 # Print profile
96 # printProfiles(profile)
97
98 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
99 }
100
101 # Plot profiles to PNG
102 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
103 if (!is.null(profile.CC)) {
104 png("profile.CC.png")
105 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
106 dev.off()
107 }
108 if (!is.null(profile.BP)) {
109 png("profile.BP.png")
110 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
111 dev.off()
112 }
113 if (!is.null(profile.MF)) {
114 png("profile.MF.png")
115 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
116 dev.off()
117 }
118 if (!is.null(profile.ALL)) {
119 png("profile.ALL.png")
120 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
121 dev.off()
122 }
123 }
124
125 # Plot profiles to JPEG
126 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
127 if (!is.null(profile.CC)) {
128 jpeg("profile.CC.jpeg")
129 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
130 dev.off()
131 }
132 if (!is.null(profile.BP)) {
133 jpeg("profile.BP.jpeg")
134 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
135 dev.off()
136 }
137 if (!is.null(profile.MF)) {
138 jpeg("profile.MF.jpeg")
139 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
140 dev.off()
141 }
142 if (!is.null(profile.ALL)) {
143 jpeg("profile.ALL.jpeg")
144 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
145 dev.off()
146 }
147 }
148
149 # Plot profiles to PDF
150 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
151 if (!is.null(profile.CC)) {
152 pdf("profile.CC.pdf")
153 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
154 dev.off()
155 }
156 if (!is.null(profile.BP)) {
157 pdf("profile.BP.pdf")
158 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
159 dev.off()
160 }
161 if (!is.null(profile.MF)) {
162 pdf("profile.MF.pdf")
163 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
164 dev.off()
165 }
166 if (!is.null(profile.ALL)) {
167 #print("all")
168 pdf("profile.ALL.pdf")
169 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
170 dev.off()
171 }
172 }
173
174 goprofiles = function() {
175 args = commandArgs(trailingOnly = TRUE)
176 #print(args)
177 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
178 if (length(args) != 9) {
179 stop("Not enough/Too many arguments", call. = FALSE)
180 }
181 else {
182 input_type = args[2]
183 if (input_type == "text") {
184 input = strsplit(args[1], "\\s+")[[1]]
185 }
186 else if (input_type == "file") {
187 filename = strsplit(args[1], ",")[[1]][1]
188 ncol = strsplit(args[1], ",")[[1]][2]
189 # Check ncol
190 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
191 stop("Please enter an integer for level")
192 }
193 else {
194 ncol = as.numeric(gsub("c", "", ncol))
195 }
196 header = strsplit(args[1], ",")[[1]][3]
197 # Get file content
198 file = readfile(filename, header)
199 # Extract Protein IDs list
200 input = c()
201 for (row in as.character(file[,ncol])) {
202 input = c(input, strsplit(row, ";")[[1]][1])
203 }
204 }
205 id_type = args[3]
206 ontoopt = strsplit(args[4], ",")[[1]]
207 #print(ontoopt)
208 #plotopt = strsplit(args[3], ",")
209 plotopt = args[5]
210 level = args[6]
211 per = as.logical(args[7])
212 title = args[8]
213 duplicate = args[9]
214
215 profiles = getprofile(input, id_type, level, duplicate)
216 profile.CC = profiles[1]
217 #print(profile.CC)
218 profile.MF = profiles[2]
219 #print(profile.MF)
220 profile.BP = profiles[3]
221 #print(profile.BP)
222 profile.ALL = profiles[-3:-1]
223 #print(profile.ALL)
224 #c(profile.ALL, profile.CC, profile.MF, profile.BP)
225 if ("CC" %in% ontoopt) {
226 if (grepl("PNG", plotopt)) {
227 plotPNG(profile.CC=profile.CC, per=per, title=title)
228 }
229 if (grepl("JPEG", plotopt)) {
230 plotJPEG(profile.CC = profile.CC, per=per, title=title)
231 }
232 if (grepl("PDF", plotopt)) {
233 plotPDF(profile.CC = profile.CC, per=per, title=title)
234 }
235 }
236 if ("MF" %in% ontoopt) {
237 if (grepl("PNG", plotopt)) {
238 plotPNG(profile.MF = profile.MF, per=per, title=title)
239 }
240 if (grepl("JPEG", plotopt)) {
241 plotJPEG(profile.MF = profile.MF, per=per, title=title)
242 }
243 if (grepl("PDF", plotopt)) {
244 plotPDF(profile.MF = profile.MF, per=per, title=title)
245 }
246 }
247 if ("BP" %in% ontoopt) {
248 if (grepl("PNG", plotopt)) {
249 plotPNG(profile.BP = profile.BP, per=per, title=title)
250 }
251 if (grepl("JPEG", plotopt)) {
252 plotJPEG(profile.BP = profile.BP, per=per, title=title)
253 }
254 if (grepl("PDF", plotopt)) {
255 plotPDF(profile.BP = profile.BP, per=per, title=title)
256 }
257 }
258
259 #if (grepl("PNG", plotopt)) {
260 # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
261 #}
262 #if (grepl("JPEG", plotopt)) {
263 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
264 #}
265 #if (grepl("PDF", plotopt)) {
266 # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
267 #}
268 }
269
270 }
271
272 goprofiles()
273
274 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"