Mercurial > repos > proteore > proteore_goprofiles
comparison goprofiles.R @ 0:d89c09253c8d draft
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
author | proteore |
---|---|
date | Sun, 26 Nov 2017 19:19:39 -0500 |
parents | |
children | 1236ee08ccb8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d89c09253c8d |
---|---|
1 # Load necessary libraries | |
2 library("org.Hs.eg.db", quietly=TRUE) | |
3 library("goProfiles", quietly=TRUE) | |
4 | |
5 # Read file and return file content as data.frame? | |
6 readfile = function(filename, header) { | |
7 if (header == "true") { | |
8 # Read only the first two lines of the files as data (without headers): | |
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
10 #print("header") | |
11 #print(headers) | |
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here : | |
13 #headers_names <- sapply(headers, paste, collapse = "_") | |
14 #print(headers_names) | |
15 #Read the data of the files (skipping the first 2 rows): | |
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
17 #print(file[1,]) | |
18 #And assign the headers of step two to the data: | |
19 names(file) <- headers | |
20 } | |
21 else { | |
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
23 } | |
24 return(file) | |
25 } | |
26 | |
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt" | |
28 #test = readfile(filename) | |
29 #str(test) | |
30 #str(test$Gene.names) | |
31 getprofile = function(ids, id_type, level, duplicate) { | |
32 #################################################################### | |
33 # Arguments | |
34 # - ids: list of input IDs | |
35 # - id_type: type of input IDs (UniProt/ENTREZID) | |
36 # - level | |
37 # - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE) | |
38 #################################################################### | |
39 | |
40 # Check if level is number | |
41 if (! as.numeric(level) %% 1 == 0) { | |
42 stop("Please enter an integer for level") | |
43 } | |
44 else { | |
45 level = as.numeric(level) | |
46 } | |
47 #genes = as.vector(file[,ncol]) | |
48 | |
49 # Extract Gene Entrez ID | |
50 if (id_type == "Entrez") { | |
51 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") | |
52 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
53 } | |
54 else { | |
55 genes_ids = c() | |
56 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") | |
57 if (duplicate == "TRUE") { | |
58 id = unique(id) | |
59 } | |
60 print(id[[1]]) | |
61 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
62 # IDs that have NA ENTREZID | |
63 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] | |
64 print("IDs unable to convert to ENTREZID: ") | |
65 print(NAs) | |
66 } | |
67 #print(genes_ids) | |
68 # Convert Protein IDs into entrez ids | |
69 | |
70 # for (i in 1:length(id$UNIPROT)) { | |
71 # print(i) | |
72 # if (is.na(id[[2]][i])) { | |
73 # print(id[[2]][i]) | |
74 # } | |
75 # } | |
76 # a = id[which(id$ENTREZID == "NA"),] | |
77 # print(a) | |
78 # print(a$UNIPROT) | |
79 #print(id[[1]][which(is.na(id$ENTREZID))]) | |
80 #print(genes_ids) | |
81 # for (gene in genes) { | |
82 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA)) | |
83 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT") | |
84 # print(id) | |
85 # genes_ids = append(genes_ids, id$ENTREZID) | |
86 # } | |
87 #print(genes_ids) | |
88 | |
89 # Create basic profiles | |
90 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
91 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
92 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
93 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
94 | |
95 # Print profile | |
96 # printProfiles(profile) | |
97 | |
98 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) | |
99 } | |
100 | |
101 # Plot profiles to PNG | |
102 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
103 if (!is.null(profile.CC)) { | |
104 png("profile.CC.png") | |
105 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
106 dev.off() | |
107 } | |
108 if (!is.null(profile.BP)) { | |
109 png("profile.BP.png") | |
110 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
111 dev.off() | |
112 } | |
113 if (!is.null(profile.MF)) { | |
114 png("profile.MF.png") | |
115 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
116 dev.off() | |
117 } | |
118 if (!is.null(profile.ALL)) { | |
119 png("profile.ALL.png") | |
120 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title) | |
121 dev.off() | |
122 } | |
123 } | |
124 | |
125 # Plot profiles to JPEG | |
126 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
127 if (!is.null(profile.CC)) { | |
128 jpeg("profile.CC.jpeg") | |
129 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
130 dev.off() | |
131 } | |
132 if (!is.null(profile.BP)) { | |
133 jpeg("profile.BP.jpeg") | |
134 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
135 dev.off() | |
136 } | |
137 if (!is.null(profile.MF)) { | |
138 jpeg("profile.MF.jpeg") | |
139 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
140 dev.off() | |
141 } | |
142 if (!is.null(profile.ALL)) { | |
143 jpeg("profile.ALL.jpeg") | |
144 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
145 dev.off() | |
146 } | |
147 } | |
148 | |
149 # Plot profiles to PDF | |
150 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
151 if (!is.null(profile.CC)) { | |
152 pdf("profile.CC.pdf") | |
153 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
154 dev.off() | |
155 } | |
156 if (!is.null(profile.BP)) { | |
157 pdf("profile.BP.pdf") | |
158 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
159 dev.off() | |
160 } | |
161 if (!is.null(profile.MF)) { | |
162 pdf("profile.MF.pdf") | |
163 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
164 dev.off() | |
165 } | |
166 if (!is.null(profile.ALL)) { | |
167 #print("all") | |
168 pdf("profile.ALL.pdf") | |
169 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
170 dev.off() | |
171 } | |
172 } | |
173 | |
174 goprofiles = function() { | |
175 args = commandArgs(trailingOnly = TRUE) | |
176 #print(args) | |
177 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title" | |
178 if (length(args) != 9) { | |
179 stop("Not enough/Too many arguments", call. = FALSE) | |
180 } | |
181 else { | |
182 input_type = args[2] | |
183 if (input_type == "text") { | |
184 input = strsplit(args[1], "\\s+")[[1]] | |
185 } | |
186 else if (input_type == "file") { | |
187 filename = strsplit(args[1], ",")[[1]][1] | |
188 ncol = strsplit(args[1], ",")[[1]][2] | |
189 # Check ncol | |
190 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | |
191 stop("Please enter an integer for level") | |
192 } | |
193 else { | |
194 ncol = as.numeric(gsub("c", "", ncol)) | |
195 } | |
196 header = strsplit(args[1], ",")[[1]][3] | |
197 # Get file content | |
198 file = readfile(filename, header) | |
199 # Extract Protein IDs list | |
200 input = c() | |
201 for (row in as.character(file[,ncol])) { | |
202 input = c(input, strsplit(row, ";")[[1]][1]) | |
203 } | |
204 } | |
205 id_type = args[3] | |
206 ontoopt = strsplit(args[4], ",")[[1]] | |
207 #print(ontoopt) | |
208 #plotopt = strsplit(args[3], ",") | |
209 plotopt = args[5] | |
210 level = args[6] | |
211 per = as.logical(args[7]) | |
212 title = args[8] | |
213 duplicate = args[9] | |
214 | |
215 profiles = getprofile(input, id_type, level, duplicate) | |
216 profile.CC = profiles[1] | |
217 #print(profile.CC) | |
218 profile.MF = profiles[2] | |
219 #print(profile.MF) | |
220 profile.BP = profiles[3] | |
221 #print(profile.BP) | |
222 profile.ALL = profiles[-3:-1] | |
223 #print(profile.ALL) | |
224 #c(profile.ALL, profile.CC, profile.MF, profile.BP) | |
225 if ("CC" %in% ontoopt) { | |
226 if (grepl("PNG", plotopt)) { | |
227 plotPNG(profile.CC=profile.CC, per=per, title=title) | |
228 } | |
229 if (grepl("JPEG", plotopt)) { | |
230 plotJPEG(profile.CC = profile.CC, per=per, title=title) | |
231 } | |
232 if (grepl("PDF", plotopt)) { | |
233 plotPDF(profile.CC = profile.CC, per=per, title=title) | |
234 } | |
235 } | |
236 if ("MF" %in% ontoopt) { | |
237 if (grepl("PNG", plotopt)) { | |
238 plotPNG(profile.MF = profile.MF, per=per, title=title) | |
239 } | |
240 if (grepl("JPEG", plotopt)) { | |
241 plotJPEG(profile.MF = profile.MF, per=per, title=title) | |
242 } | |
243 if (grepl("PDF", plotopt)) { | |
244 plotPDF(profile.MF = profile.MF, per=per, title=title) | |
245 } | |
246 } | |
247 if ("BP" %in% ontoopt) { | |
248 if (grepl("PNG", plotopt)) { | |
249 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
250 } | |
251 if (grepl("JPEG", plotopt)) { | |
252 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
253 } | |
254 if (grepl("PDF", plotopt)) { | |
255 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
256 } | |
257 } | |
258 | |
259 #if (grepl("PNG", plotopt)) { | |
260 # plotPNG(profile.ALL = profile.ALL, per=per, title=title) | |
261 #} | |
262 #if (grepl("JPEG", plotopt)) { | |
263 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title) | |
264 #} | |
265 #if (grepl("PDF", plotopt)) { | |
266 # plotPDF(profile.ALL = profile.ALL, per=per, title=title) | |
267 #} | |
268 } | |
269 | |
270 } | |
271 | |
272 goprofiles() | |
273 | |
274 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title" |