Mercurial > repos > proteore > proteore_goprofiles
comparison goprofiles.R @ 1:1236ee08ccb8 draft
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author | proteore |
---|---|
date | Fri, 16 Feb 2018 03:40:36 -0500 |
parents | d89c09253c8d |
children | 58a8ddd58dde |
comparison
equal
deleted
inserted
replaced
0:d89c09253c8d | 1:1236ee08ccb8 |
---|---|
3 library("goProfiles", quietly=TRUE) | 3 library("goProfiles", quietly=TRUE) |
4 | 4 |
5 # Read file and return file content as data.frame? | 5 # Read file and return file content as data.frame? |
6 readfile = function(filename, header) { | 6 readfile = function(filename, header) { |
7 if (header == "true") { | 7 if (header == "true") { |
8 # Read only the first two lines of the files as data (without headers): | 8 # Read only the first line of the files as data (without headers): |
9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | 9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
10 #print("header") | 10 #Read the data of the files (skipping the first row): |
11 #print(headers) | |
12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here : | |
13 #headers_names <- sapply(headers, paste, collapse = "_") | |
14 #print(headers_names) | |
15 #Read the data of the files (skipping the first 2 rows): | |
16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | 11 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
17 #print(file[1,]) | 12 # Remove empty rows |
13 file <- file[!apply(is.na(file) | file == "", 1, all),] | |
18 #And assign the headers of step two to the data: | 14 #And assign the headers of step two to the data: |
19 names(file) <- headers | 15 names(file) <- headers |
20 } | 16 } |
21 else { | 17 else { |
22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | 18 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
23 } | 19 } |
24 return(file) | 20 return(file) |
25 } | 21 } |
26 | 22 |
27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt" | |
28 #test = readfile(filename) | |
29 #str(test) | |
30 #str(test$Gene.names) | |
31 getprofile = function(ids, id_type, level, duplicate) { | 23 getprofile = function(ids, id_type, level, duplicate) { |
32 #################################################################### | 24 #################################################################### |
33 # Arguments | 25 # Arguments |
34 # - ids: list of input IDs | 26 # - ids: list of input IDs |
35 # - id_type: type of input IDs (UniProt/ENTREZID) | 27 # - id_type: type of input IDs (UniProt/ENTREZID) |
62 # IDs that have NA ENTREZID | 54 # IDs that have NA ENTREZID |
63 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] | 55 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] |
64 print("IDs unable to convert to ENTREZID: ") | 56 print("IDs unable to convert to ENTREZID: ") |
65 print(NAs) | 57 print(NAs) |
66 } | 58 } |
67 #print(genes_ids) | |
68 # Convert Protein IDs into entrez ids | |
69 | |
70 # for (i in 1:length(id$UNIPROT)) { | |
71 # print(i) | |
72 # if (is.na(id[[2]][i])) { | |
73 # print(id[[2]][i]) | |
74 # } | |
75 # } | |
76 # a = id[which(id$ENTREZID == "NA"),] | |
77 # print(a) | |
78 # print(a$UNIPROT) | |
79 #print(id[[1]][which(is.na(id$ENTREZID))]) | |
80 #print(genes_ids) | |
81 # for (gene in genes) { | |
82 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA)) | |
83 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT") | |
84 # print(id) | |
85 # genes_ids = append(genes_ids, id$ENTREZID) | |
86 # } | |
87 #print(genes_ids) | |
88 | 59 |
89 # Create basic profiles | 60 # Create basic profiles |
90 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 61 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) |
91 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 62 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) |
92 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | 63 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) |
170 dev.off() | 141 dev.off() |
171 } | 142 } |
172 } | 143 } |
173 | 144 |
174 goprofiles = function() { | 145 goprofiles = function() { |
175 args = commandArgs(trailingOnly = TRUE) | 146 args <- commandArgs(TRUE) |
176 #print(args) | 147 if(length(args)<1) { |
177 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title" | 148 args <- c("--help") |
178 if (length(args) != 9) { | 149 } |
179 stop("Not enough/Too many arguments", call. = FALSE) | 150 |
180 } | 151 # Help section |
181 else { | 152 if("--help" %in% args) { |
182 input_type = args[2] | 153 cat("Selection and Annotation HPA |
183 if (input_type == "text") { | 154 Arguments: |
184 input = strsplit(args[1], "\\s+")[[1]] | 155 --input_type: type of input (list of id or filename) |
185 } | 156 --input: input |
186 else if (input_type == "file") { | 157 --ncol: the column number which you would like to apply... |
187 filename = strsplit(args[1], ",")[[1]][1] | 158 --header: true/false if your file contains a header |
188 ncol = strsplit(args[1], ",")[[1]][2] | 159 --id_type: the type of input IDs (UniProt/EntrezID) |
189 # Check ncol | 160 --onto_opt: ontology options |
190 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | 161 --plot_opt: plot extension options (PDF/JPEG/PNG) |
191 stop("Please enter an integer for level") | 162 --level: 1-3 |
192 } | 163 --per |
193 else { | 164 --title: title of the plot |
194 ncol = as.numeric(gsub("c", "", ncol)) | 165 --duplicate: remove dupliate input IDs (true/false) |
195 } | 166 --text_output: text output filename \n") |
196 header = strsplit(args[1], ",")[[1]][3] | 167 q(save="no") |
197 # Get file content | 168 } |
198 file = readfile(filename, header) | 169 |
199 # Extract Protein IDs list | 170 # Parse arguments |
200 input = c() | 171 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
201 for (row in as.character(file[,ncol])) { | 172 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
202 input = c(input, strsplit(row, ";")[[1]][1]) | 173 args <- as.list(as.character(argsDF$V2)) |
203 } | 174 names(args) <- argsDF$V1 |
204 } | 175 |
205 id_type = args[3] | 176 input_type = args$input_type |
206 ontoopt = strsplit(args[4], ",")[[1]] | 177 if (input_type == "text") { |
207 #print(ontoopt) | 178 input = strsplit(args$input, " ")[[1]] |
208 #plotopt = strsplit(args[3], ",") | 179 } |
209 plotopt = args[5] | 180 else if (input_type == "file") { |
210 level = args[6] | 181 filename = args$input |
211 per = as.logical(args[7]) | 182 ncol = args$ncol |
212 title = args[8] | 183 # Check ncol |
213 duplicate = args[9] | 184 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
214 | 185 stop("Please enter an integer for level") |
215 profiles = getprofile(input, id_type, level, duplicate) | 186 } |
216 profile.CC = profiles[1] | 187 else { |
217 #print(profile.CC) | 188 ncol = as.numeric(gsub("c", "", ncol)) |
218 profile.MF = profiles[2] | 189 } |
219 #print(profile.MF) | 190 header = args$header |
220 profile.BP = profiles[3] | 191 # Get file content |
221 #print(profile.BP) | 192 file = readfile(filename, header) |
222 profile.ALL = profiles[-3:-1] | 193 # Extract Protein IDs list |
223 #print(profile.ALL) | 194 input = c() |
224 #c(profile.ALL, profile.CC, profile.MF, profile.BP) | 195 for (row in as.character(file[,ncol])) { |
225 if ("CC" %in% ontoopt) { | 196 input = c(input, strsplit(row, ";")[[1]][1]) |
226 if (grepl("PNG", plotopt)) { | 197 } |
227 plotPNG(profile.CC=profile.CC, per=per, title=title) | 198 } |
228 } | 199 id_type = args$id_type |
229 if (grepl("JPEG", plotopt)) { | 200 ontoopt = strsplit(args$onto_opt, ",")[[1]] |
230 plotJPEG(profile.CC = profile.CC, per=per, title=title) | 201 #print(ontoopt) |
231 } | 202 #plotopt = strsplit(args[3], ",") |
232 if (grepl("PDF", plotopt)) { | 203 plotopt = args$plot_opt |
233 plotPDF(profile.CC = profile.CC, per=per, title=title) | 204 level = args$level |
234 } | 205 per = as.logical(args$per) |
235 } | 206 title = args$title |
236 if ("MF" %in% ontoopt) { | 207 duplicate = args$duplicate |
237 if (grepl("PNG", plotopt)) { | 208 text_output = args$text_output |
238 plotPNG(profile.MF = profile.MF, per=per, title=title) | 209 |
239 } | 210 profiles = getprofile(input, id_type, level, duplicate) |
240 if (grepl("JPEG", plotopt)) { | 211 profile.CC = profiles[1] |
241 plotJPEG(profile.MF = profile.MF, per=per, title=title) | 212 #print(profile.CC) |
242 } | 213 profile.MF = profiles[2] |
243 if (grepl("PDF", plotopt)) { | 214 #print(profile.MF) |
244 plotPDF(profile.MF = profile.MF, per=per, title=title) | 215 profile.BP = profiles[3] |
245 } | 216 #print(profile.BP) |
246 } | 217 profile.ALL = profiles[-3:-1] |
247 if ("BP" %in% ontoopt) { | 218 #print(profile.ALL) |
248 if (grepl("PNG", plotopt)) { | 219 #c(profile.ALL, profile.CC, profile.MF, profile.BP) |
249 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
250 } | |
251 if (grepl("JPEG", plotopt)) { | |
252 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
253 } | |
254 if (grepl("PDF", plotopt)) { | |
255 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
256 } | |
257 } | |
258 | 220 |
259 #if (grepl("PNG", plotopt)) { | 221 if ("CC" %in% ontoopt) { |
260 # plotPNG(profile.ALL = profile.ALL, per=per, title=title) | 222 write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) |
261 #} | 223 if (grepl("PNG", plotopt)) { |
262 #if (grepl("JPEG", plotopt)) { | 224 plotPNG(profile.CC=profile.CC, per=per, title=title) |
263 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title) | 225 } |
264 #} | 226 if (grepl("JPEG", plotopt)) { |
265 #if (grepl("PDF", plotopt)) { | 227 plotJPEG(profile.CC = profile.CC, per=per, title=title) |
266 # plotPDF(profile.ALL = profile.ALL, per=per, title=title) | 228 } |
267 #} | 229 if (grepl("PDF", plotopt)) { |
268 } | 230 plotPDF(profile.CC = profile.CC, per=per, title=title) |
269 | 231 } |
232 } | |
233 if ("MF" %in% ontoopt) { | |
234 write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
235 if (grepl("PNG", plotopt)) { | |
236 plotPNG(profile.MF = profile.MF, per=per, title=title) | |
237 } | |
238 if (grepl("JPEG", plotopt)) { | |
239 plotJPEG(profile.MF = profile.MF, per=per, title=title) | |
240 } | |
241 if (grepl("PDF", plotopt)) { | |
242 plotPDF(profile.MF = profile.MF, per=per, title=title) | |
243 } | |
244 } | |
245 if ("BP" %in% ontoopt) { | |
246 write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) | |
247 if (grepl("PNG", plotopt)) { | |
248 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
249 } | |
250 if (grepl("JPEG", plotopt)) { | |
251 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
252 } | |
253 if (grepl("PDF", plotopt)) { | |
254 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
255 } | |
256 } | |
270 } | 257 } |
271 | 258 |
272 goprofiles() | 259 goprofiles() |
273 | |
274 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title" |