annotate Get_ms-ms_observations.R @ 9:14c890ff105a draft

"planemo upload"
author proteore
date Mon, 04 May 2020 03:18:37 -0400
parents 3e9ea4797b00
children ef723082fc54
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
1 # Read file and return file content as data.frame
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
2 read_file <- function(path,header){
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
3 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
4 if (inherits(file,"try-error")){
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
5 stop("File not found !")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
6 }else{
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
7 return(file)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
8 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
9 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
10
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
11 str2bool <- function(x){
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
12 if (any(is.element(c("t","true"),tolower(x)))){
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
13 return (TRUE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
14 }else if (any(is.element(c("f","false"),tolower(x)))){
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
15 return (FALSE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
16 }else{
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
17 return(NULL)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
18 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
19 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
20
6
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
21 #take data frame, return data frame
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
22 split_ids_per_line <- function(line,ncol){
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
23
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
24 #print (line)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
25 header = colnames(line)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
26 line[ncol] = gsub("[[:blank:]]","",line[ncol])
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
27
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
28 if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
29 if (length(line)==1 ) {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
30 lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
31 } else {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
32 if (ncol==1) { #first column
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
33 lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)]))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
34 } else if (ncol==length(line)) { #last column
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
35 lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";"))))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
36 } else {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
37 lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)]))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
38 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
39 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
40 colnames(lines)=header
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
41 return(lines)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
42 } else {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
43 return(line)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
44 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
45 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
46
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
47 #create new lines if there's more than one id per cell in the column in order to have only one id per line
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
48 one_id_one_line <-function(tab,ncol){
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
49 if (ncol(tab)>1){
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
50 tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
51 header=colnames(tab)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
52 res=as.data.frame(matrix(ncol=ncol(tab),nrow=0))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
53 for (i in 1:nrow(tab) ) {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
54 lines = split_ids_per_line(tab[i,],ncol)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
55 res = rbind(res,lines)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
56 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
57 }else {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
58 res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
59 res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
60 colnames(res)=colnames(tab)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
61 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
62 return(res)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
63 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
64
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
65 nb_obs_PeptideAtlas <- function(input, atlas_file) {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
66 ## Calculate the sum of n_observations for each ID in input
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
67 atlas = read_file(atlas_file, T)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
68 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)])
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
69 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
70
6
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
71 #function to create a list of infos from file path
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
72 extract_info_from_path <- function(path) {
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
73 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1]
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
74 date=tail(strsplit(file_name,"_")[[1]],n=1)
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
75 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_")
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
76 return (c(date,tissue,file_name,path))
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
77 }
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
78
7
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
79 clean_ids <- function(ids){
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
80
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
81 ids = gsub(" ","",ids)
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
82 ids = ids[which(ids!="")]
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
83 ids = ids[which(ids!="NA")]
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
84 ids = ids[!is.na(ids)]
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
85
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
86 return(ids)
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
87 }
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
88
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
89 main = function() {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
90 args <- commandArgs(TRUE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
91 if(length(args)<1) {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
92 args <- c("--help")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
93 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
94
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
95 # Help section
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
96 if("--help" %in% args) {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
97 cat("Selection and Annotation HPA
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
98 Arguments:
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
99 --input_type: type of input (list of id or filename)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
100 --input: input
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
101 --atlas: list of file(s) path to use
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
102 --output: text output filename \n")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
103 q(save="no")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
104 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
105
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
106 # Parse arguments
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
107 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
108 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
109 args <- as.list(as.character(argsDF$V2))
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
110 names(args) <- argsDF$V1
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
111
9
14c890ff105a "planemo upload"
proteore
parents: 7
diff changeset
112 #save(args,file="/Users/David/work/ProteoRE/tools/Get_ms-ms_observations/args.Rda")
14c890ff105a "planemo upload"
proteore
parents: 7
diff changeset
113 load("/Users/David/work/ProteoRE/tools/Get_ms-ms_observations/args.Rda")
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
114
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
115 # Extract input
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
116 input_type = args$input_type
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
117 if (input_type == "list") {
7
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
118 input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
119 } else if (input_type == "file") {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
120 filename = args$input
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
121 ncol = args$column
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
122 # Check ncol
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
123 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
124 stop("Please enter an integer for level")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
125 } else {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
126 ncol = as.numeric(gsub("c", "", ncol))
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
127 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
128 header = str2bool(args$header)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
129 file = read_file(filename, header)
6
e77c0f3e9bab planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
proteore
parents: 2
diff changeset
130 file = one_id_one_line(file,ncol) #only one id per line
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
131 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
132 }
7
3e9ea4797b00 planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
proteore
parents: 6
diff changeset
133 input = clean_ids(input)
2
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
134 output = args$output
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
135
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
136 #data_frame building
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
137 paths=strsplit(args$atlas,",")[[1]]
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
138 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
139 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
140 names(df) <- c("date","tissue","filename","path")
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
141
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
142 # Annotations
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
143 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
144 colnames(res)=df$filename
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
145
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
146 # Write output
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
147 if (input_type == "list") {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
148 res = cbind(as.matrix(input), res)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
149 colnames(res)[1] = "Uniprot accession number"
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
150 } else if (input_type == "file") {
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
151 res = cbind(file, res)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
152 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
153 res = as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)))
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
154 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE)
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
155
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
156 }
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
157
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
158 main()
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
159 #Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine"
6ab9d2778f04 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff changeset
160