comparison Get_ms-ms_observations.R @ 6:e77c0f3e9bab draft

planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
author proteore
date Thu, 02 May 2019 05:08:06 -0400
parents 6ab9d2778f04
children 3e9ea4797b00
comparison
equal deleted inserted replaced
5:034cfb96a482 6:e77c0f3e9bab
16 }else{ 16 }else{
17 return(NULL) 17 return(NULL)
18 } 18 }
19 } 19 }
20 20
21 #take data frame, return data frame
22 split_ids_per_line <- function(line,ncol){
23
24 #print (line)
25 header = colnames(line)
26 line[ncol] = gsub("[[:blank:]]","",line[ncol])
27
28 if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) {
29 if (length(line)==1 ) {
30 lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F)
31 } else {
32 if (ncol==1) { #first column
33 lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)]))
34 } else if (ncol==length(line)) { #last column
35 lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";"))))
36 } else {
37 lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)]))
38 }
39 }
40 colnames(lines)=header
41 return(lines)
42 } else {
43 return(line)
44 }
45 }
46
47 #create new lines if there's more than one id per cell in the column in order to have only one id per line
48 one_id_one_line <-function(tab,ncol){
49 if (ncol(tab)>1){
50 tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x))
51 header=colnames(tab)
52 res=as.data.frame(matrix(ncol=ncol(tab),nrow=0))
53 for (i in 1:nrow(tab) ) {
54 lines = split_ids_per_line(tab[i,],ncol)
55 res = rbind(res,lines)
56 }
57 }else {
58 res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F)
59 res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F)
60 colnames(res)=colnames(tab)
61 }
62 return(res)
63 }
64
21 nb_obs_PeptideAtlas <- function(input, atlas_file) { 65 nb_obs_PeptideAtlas <- function(input, atlas_file) {
22 ## Calculate the sum of n_observations for each ID in input 66 ## Calculate the sum of n_observations for each ID in input
23 atlas = read_file(atlas_file, T) 67 atlas = read_file(atlas_file, T)
24 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) 68 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)])
69 }
70
71 #function to create a list of infos from file path
72 extract_info_from_path <- function(path) {
73 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1]
74 date=tail(strsplit(file_name,"_")[[1]],n=1)
75 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_")
76 return (c(date,tissue,file_name,path))
25 } 77 }
26 78
27 main = function() { 79 main = function() {
28 args <- commandArgs(TRUE) 80 args <- commandArgs(TRUE)
29 if(length(args)<1) { 81 if(length(args)<1) {
45 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") 97 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
46 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) 98 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
47 args <- as.list(as.character(argsDF$V2)) 99 args <- as.list(as.character(argsDF$V2))
48 names(args) <- argsDF$V1 100 names(args) <- argsDF$V1
49 101
50 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") 102 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda")
51 #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") 103 #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda")
52 104
53 # Extract input 105 # Extract input
54 input_type = args$input_type 106 input_type = args$input_type
55 if (input_type == "list") { 107 if (input_type == "list") {
56 input = strsplit(args$input, "[ \t\n]+")[[1]] 108 input = strsplit(args$input, "[ \t\n]+")[[1]]
63 } else { 115 } else {
64 ncol = as.numeric(gsub("c", "", ncol)) 116 ncol = as.numeric(gsub("c", "", ncol))
65 } 117 }
66 header = str2bool(args$header) 118 header = str2bool(args$header)
67 file = read_file(filename, header) 119 file = read_file(filename, header)
120 file = one_id_one_line(file,ncol) #only one id per line
68 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) 121 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F)
69 } 122 }
70 123
71 output = args$output 124 output = args$output
72
73 #function to create a list of infos from file path
74 extract_info_from_path <- function(path) {
75 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1]
76 date=tail(strsplit(file_name,"_")[[1]],n=1)
77 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_")
78 return (c(date,tissue,file_name,path))
79 }
80 125
81 #data_frame building 126 #data_frame building
82 paths=strsplit(args$atlas,",")[[1]] 127 paths=strsplit(args$atlas,",")[[1]]
83 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) 128 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE)
84 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) 129 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE)
85 names(df) <- c("date","tissue","filename","path") 130 names(df) <- c("date","tissue","filename","path")
86 131
87 # Annotations 132 # Annotations
88 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) 133 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE)
89
90 colnames(res)=df$filename 134 colnames(res)=df$filename
91 135
92 # Write output 136 # Write output
93 if (input_type == "list") { 137 if (input_type == "list") {
94 res = cbind(as.matrix(input), res) 138 res = cbind(as.matrix(input), res)