Mercurial > repos > proteore > proteore_ms_observation_pepatlas
comparison Get_ms-ms_observations.R @ 6:e77c0f3e9bab draft
planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
| author | proteore |
|---|---|
| date | Thu, 02 May 2019 05:08:06 -0400 |
| parents | 6ab9d2778f04 |
| children | 3e9ea4797b00 |
comparison
equal
deleted
inserted
replaced
| 5:034cfb96a482 | 6:e77c0f3e9bab |
|---|---|
| 16 }else{ | 16 }else{ |
| 17 return(NULL) | 17 return(NULL) |
| 18 } | 18 } |
| 19 } | 19 } |
| 20 | 20 |
| 21 #take data frame, return data frame | |
| 22 split_ids_per_line <- function(line,ncol){ | |
| 23 | |
| 24 #print (line) | |
| 25 header = colnames(line) | |
| 26 line[ncol] = gsub("[[:blank:]]","",line[ncol]) | |
| 27 | |
| 28 if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) { | |
| 29 if (length(line)==1 ) { | |
| 30 lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F) | |
| 31 } else { | |
| 32 if (ncol==1) { #first column | |
| 33 lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)])) | |
| 34 } else if (ncol==length(line)) { #last column | |
| 35 lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";")))) | |
| 36 } else { | |
| 37 lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)])) | |
| 38 } | |
| 39 } | |
| 40 colnames(lines)=header | |
| 41 return(lines) | |
| 42 } else { | |
| 43 return(line) | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 #create new lines if there's more than one id per cell in the column in order to have only one id per line | |
| 48 one_id_one_line <-function(tab,ncol){ | |
| 49 if (ncol(tab)>1){ | |
| 50 tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x)) | |
| 51 header=colnames(tab) | |
| 52 res=as.data.frame(matrix(ncol=ncol(tab),nrow=0)) | |
| 53 for (i in 1:nrow(tab) ) { | |
| 54 lines = split_ids_per_line(tab[i,],ncol) | |
| 55 res = rbind(res,lines) | |
| 56 } | |
| 57 }else { | |
| 58 res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F) | |
| 59 res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F) | |
| 60 colnames(res)=colnames(tab) | |
| 61 } | |
| 62 return(res) | |
| 63 } | |
| 64 | |
| 21 nb_obs_PeptideAtlas <- function(input, atlas_file) { | 65 nb_obs_PeptideAtlas <- function(input, atlas_file) { |
| 22 ## Calculate the sum of n_observations for each ID in input | 66 ## Calculate the sum of n_observations for each ID in input |
| 23 atlas = read_file(atlas_file, T) | 67 atlas = read_file(atlas_file, T) |
| 24 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) | 68 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) |
| 69 } | |
| 70 | |
| 71 #function to create a list of infos from file path | |
| 72 extract_info_from_path <- function(path) { | |
| 73 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] | |
| 74 date=tail(strsplit(file_name,"_")[[1]],n=1) | |
| 75 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") | |
| 76 return (c(date,tissue,file_name,path)) | |
| 25 } | 77 } |
| 26 | 78 |
| 27 main = function() { | 79 main = function() { |
| 28 args <- commandArgs(TRUE) | 80 args <- commandArgs(TRUE) |
| 29 if(length(args)<1) { | 81 if(length(args)<1) { |
| 45 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | 97 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
| 46 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | 98 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
| 47 args <- as.list(as.character(argsDF$V2)) | 99 args <- as.list(as.character(argsDF$V2)) |
| 48 names(args) <- argsDF$V1 | 100 names(args) <- argsDF$V1 |
| 49 | 101 |
| 50 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") | 102 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda") |
| 51 #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") | 103 #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda") |
| 52 | 104 |
| 53 # Extract input | 105 # Extract input |
| 54 input_type = args$input_type | 106 input_type = args$input_type |
| 55 if (input_type == "list") { | 107 if (input_type == "list") { |
| 56 input = strsplit(args$input, "[ \t\n]+")[[1]] | 108 input = strsplit(args$input, "[ \t\n]+")[[1]] |
| 63 } else { | 115 } else { |
| 64 ncol = as.numeric(gsub("c", "", ncol)) | 116 ncol = as.numeric(gsub("c", "", ncol)) |
| 65 } | 117 } |
| 66 header = str2bool(args$header) | 118 header = str2bool(args$header) |
| 67 file = read_file(filename, header) | 119 file = read_file(filename, header) |
| 120 file = one_id_one_line(file,ncol) #only one id per line | |
| 68 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) | 121 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) |
| 69 } | 122 } |
| 70 | 123 |
| 71 output = args$output | 124 output = args$output |
| 72 | |
| 73 #function to create a list of infos from file path | |
| 74 extract_info_from_path <- function(path) { | |
| 75 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] | |
| 76 date=tail(strsplit(file_name,"_")[[1]],n=1) | |
| 77 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") | |
| 78 return (c(date,tissue,file_name,path)) | |
| 79 } | |
| 80 | 125 |
| 81 #data_frame building | 126 #data_frame building |
| 82 paths=strsplit(args$atlas,",")[[1]] | 127 paths=strsplit(args$atlas,",")[[1]] |
| 83 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) | 128 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) |
| 84 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) | 129 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) |
| 85 names(df) <- c("date","tissue","filename","path") | 130 names(df) <- c("date","tissue","filename","path") |
| 86 | 131 |
| 87 # Annotations | 132 # Annotations |
| 88 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) | 133 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) |
| 89 | |
| 90 colnames(res)=df$filename | 134 colnames(res)=df$filename |
| 91 | 135 |
| 92 # Write output | 136 # Write output |
| 93 if (input_type == "list") { | 137 if (input_type == "list") { |
| 94 res = cbind(as.matrix(input), res) | 138 res = cbind(as.matrix(input), res) |
