Mercurial > repos > proteore > proteore_goprofiles
comparison goprofiles.R @ 6:6afe8166a9a4 draft
planemo upload commit 4e898239997b7ef266b1d0ce0a4c7cdec31b4ecd-dirty
author | proteore |
---|---|
date | Fri, 21 Sep 2018 09:23:38 -0400 |
parents | 781072a65600 |
children | 3e138d54c105 |
comparison
equal
deleted
inserted
replaced
5:781072a65600 | 6:6afe8166a9a4 |
---|---|
1 options(warn=-1) #TURN OFF WARNINGS !!!!!! | |
2 | |
1 # Load necessary libraries | 3 # Load necessary libraries |
2 library(goProfiles,quietly = TRUE) | 4 suppressMessages(library(goProfiles,quietly = TRUE)) |
3 | 5 |
4 # Read file and return file content as data.frame | 6 # Read file and return file content as data.frame |
5 readfile = function(filename, header) { | 7 readfile = function(filename, header) { |
6 if (header == "true") { | 8 if (header == "true") { |
7 # Read only first line of the file as header: | 9 # Read only first line of the file as header: |
21 return(file) | 23 return(file) |
22 } | 24 } |
23 | 25 |
24 check_ids <- function(vector,type) { | 26 check_ids <- function(vector,type) { |
25 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" | 27 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" |
26 entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" | 28 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" |
27 if (type == "Entrez"){ | 29 if (type == "Entrez"){ |
28 return(grepl(entrez_id,vector)) | 30 return(grepl(entrez_id,vector)) |
29 } else if (type == "UniProt") { | 31 } else if (type == "UniProt") { |
30 return(grepl(uniprot_pattern,vector)) | 32 return(grepl(uniprot_pattern,vector)) |
31 } | 33 } |
212 } | 214 } |
213 header = args$header | 215 header = args$header |
214 # Get file content | 216 # Get file content |
215 file = readfile(filename, header) | 217 file = readfile(filename, header) |
216 # Extract Protein IDs list | 218 # Extract Protein IDs list |
217 input = c() | 219 input = unlist(strsplit(file[,ncol],";")) |
218 for (row in as.character(file[,ncol])) { | 220 input = input [which(!is.na(input))] |
219 input = c(input, strsplit(row, ";")[[1]][1]) | |
220 } | |
221 } | 221 } |
222 | 222 |
223 if (! any(check_ids(input,id_type))){ | 223 if (! any(check_ids(input,id_type))){ |
224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) | 224 stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file")) |
225 } | 225 } |