# HG changeset patch # User proteore # Date 1545144650 18000 # Node ID 6ab9d2778f04e3e83d310f33e7016a1861705e6b # Parent 23671dd35026c5d31cac100df5dc5023e78f33f5 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Get_ms-ms_observations.R Tue Dec 18 09:50:50 2018 -0500 @@ -0,0 +1,106 @@ +# Read file and return file content as data.frame +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +nb_obs_PeptideAtlas <- function(input, atlas_file) { + ## Calculate the sum of n_observations for each ID in input + atlas = read_file(atlas_file, T) + return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) +} + +main = function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --input_type: type of input (list of id or filename) + --input: input + --atlas: list of file(s) path to use + --output: text output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") + + # Extract input + input_type = args$input_type + if (input_type == "list") { + input = strsplit(args$input, "[ \t\n]+")[[1]] + } else if (input_type == "file") { + filename = args$input + ncol = args$column + # Check ncol + if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { + stop("Please enter an integer for level") + } else { + ncol = as.numeric(gsub("c", "", ncol)) + } + header = str2bool(args$header) + file = read_file(filename, header) + input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) + } + + output = args$output + + #function to create a list of infos from file path + extract_info_from_path <- function(path) { + file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] + date=tail(strsplit(file_name,"_")[[1]],n=1) + tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") + return (c(date,tissue,file_name,path)) + } + + #data_frame building + paths=strsplit(args$atlas,",")[[1]] + tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) + df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) + names(df) <- c("date","tissue","filename","path") + + # Annotations + res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) + + colnames(res)=df$filename + + # Write output + if (input_type == "list") { + res = cbind(as.matrix(input), res) + colnames(res)[1] = "Uniprot accession number" + } else if (input_type == "file") { + res = cbind(file, res) + } + res = as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))) + write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) + +} + +main() +#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine" + diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Get_ms-ms_observations.xml Tue Dec 18 09:50:50 2018 -0500 @@ -0,0 +1,128 @@ + + [Peptide Atlas] + + R + + + + + + $__tool_directory__/Get_ms-ms_observations.R + --input_type="$input.ids" + #if $input.ids == "list" + --input="$input.list" + #else + --input="$input.file" + --column_number="$input.ncol" + --header="$input.header" + #end if + --atlas=${",".join([$__tool_data_path__+"/"+str(ref) for ref in str($atlas).split(",")])} + --output="$output" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 23671dd35026 -r 6ab9d2778f04 peptide_atlas.loc.sample --- a/peptide_atlas.loc.sample Wed Sep 19 05:24:38 2018 -0400 +++ b/peptide_atlas.loc.sample Tue Dec 18 09:50:50 2018 -0500 @@ -3,3 +3,10 @@ #This is a tab separated file (TAB, not 4 spaces !) # # +Human_Brain Human Brain 25/07/2018 peptide_atlas/Human_Brain_25-07-2018.tsv +Human_CSF Human CSF 25/07/2018 peptide_atlas/Human_CSF_25-07-2018.tsv +Human_Heart Human Heart 25/07/2018 peptide_atlas/Human_Heart_25-07-2018.tsv +Human_Kidney Human Kidney 25/07/2018 peptide_atlas/Human_Kidney_25-07-2018.tsv +Human_Liver Human Liver 25/07/2018 peptide_atlas/Human_Liver_25-07-2018.tsv +Human_Plasma Human Plasma 25/07/2018 peptide_atlas/Human_Plasma_25-07-2018.tsv +Human_Urine Human Urine 25/07/2018 peptide_atlas/Human_Urine_25-07-2018.tsv diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.R --- a/proteore_ms_observation_pepatlas.R Wed Sep 19 05:24:38 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,122 +0,0 @@ -# Read file and return file content as data.frame -readfile <- function(filename, header) { - if (header == "true") { - # Read only first line of the file as header: - headers <- try(read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE) - if (!inherits(headers, 'try-error')){ - file - } else { - stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !") - } - #Read the data of the files (skipping the first row) - file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - #And assign the header to the data - names(file) <- headers - } - else { - file <- try(read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE) - if (!inherits(file, 'try-error')){ - file - } else { - stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !") - } - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - } - return(file) -} - -nb_obs_PeptideAtlas <- function(input, atlas_file) { - ## Calculate the sum of n_observations for each ID in input - atlas = readfile(atlas_file, "true") - return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) -} - -main = function() { - args <- commandArgs(TRUE) - if(length(args)<1) { - args <- c("--help") - } - - # Help section - if("--help" %in% args) { - cat("Selection and Annotation HPA - Arguments: - --input_type: type of input (list of id or filename) - --input: input - --atlas: list of file(s) path to use - --output: text output filename \n") - q(save="no") - } - - # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") - - # Extract input - input_type = args$input_type - if (input_type == "list") { - input = strsplit(args$input, "[ \t\n]+")[[1]] - } else if (input_type == "file") { - filename = args$input - ncol = args$column - # Check ncol - if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { - stop("Please enter an integer for level") - } else { - ncol = as.numeric(gsub("c", "", ncol)) - } - header = args$header - # Get file content - file = readfile(filename, header) - # Extract Protein IDs list - input = c() - for (row in as.character(file[,ncol])) { - input = c(input, strsplit(row, ";")[[1]][1]) - } - } - - output = args$output - - #function to create a list of infos from file path - extract_info_from_path <- function(path) { - file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] - date=tail(strsplit(file_name,"_")[[1]],n=1) - tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") - return (c(date,tissue,file_name,path)) - } - - #data_frame building - paths=strsplit(args$atlas,",")[[1]] - tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) - df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) - names(df) <- c("date","tissue","filename","path") - - # Annotations - res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) - names=df$filename - - # Write output - if (input_type == "list") { - res = cbind(as.matrix(input), res) - names = c("Uniprot accession number", names) - colnames(res) = names - write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) - } else if (input_type == "file") { - names = c(names(file), names) - output_content = cbind(file, res) - colnames(output_content) = names - write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) - } -} - -main() -#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine" - diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.xml --- a/proteore_ms_observation_pepatlas.xml Wed Sep 19 05:24:38 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ - - - – homo sapiens only - - - R - - - - - - $__tool_directory__/proteore_ms_observation_pepatlas.R - --input_type="$input.ids" - #if $input.ids == "list" - --input="$input.list" - #else - --input="$input.file" - --column_number="$input.ncol" - --header="$input.header" - #end if - --atlas="$atlas" - --output="$output" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -