# HG changeset patch
# User proteore
# Date 1545144650 18000
# Node ID 6ab9d2778f04e3e83d310f33e7016a1861705e6b
# Parent 23671dd35026c5d31cac100df5dc5023e78f33f5
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Get_ms-ms_observations.R Tue Dec 18 09:50:50 2018 -0500
@@ -0,0 +1,106 @@
+# Read file and return file content as data.frame
+read_file <- function(path,header){
+ file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
+ if (inherits(file,"try-error")){
+ stop("File not found !")
+ }else{
+ return(file)
+ }
+}
+
+str2bool <- function(x){
+ if (any(is.element(c("t","true"),tolower(x)))){
+ return (TRUE)
+ }else if (any(is.element(c("f","false"),tolower(x)))){
+ return (FALSE)
+ }else{
+ return(NULL)
+ }
+}
+
+nb_obs_PeptideAtlas <- function(input, atlas_file) {
+ ## Calculate the sum of n_observations for each ID in input
+ atlas = read_file(atlas_file, T)
+ return(atlas$nb_obs[match(input,atlas$Uniprot_AC)])
+}
+
+main = function() {
+ args <- commandArgs(TRUE)
+ if(length(args)<1) {
+ args <- c("--help")
+ }
+
+ # Help section
+ if("--help" %in% args) {
+ cat("Selection and Annotation HPA
+ Arguments:
+ --input_type: type of input (list of id or filename)
+ --input: input
+ --atlas: list of file(s) path to use
+ --output: text output filename \n")
+ q(save="no")
+ }
+
+ # Parse arguments
+ parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+ argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+ args <- as.list(as.character(argsDF$V2))
+ names(args) <- argsDF$V1
+
+ #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda")
+ #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda")
+
+ # Extract input
+ input_type = args$input_type
+ if (input_type == "list") {
+ input = strsplit(args$input, "[ \t\n]+")[[1]]
+ } else if (input_type == "file") {
+ filename = args$input
+ ncol = args$column
+ # Check ncol
+ if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+ stop("Please enter an integer for level")
+ } else {
+ ncol = as.numeric(gsub("c", "", ncol))
+ }
+ header = str2bool(args$header)
+ file = read_file(filename, header)
+ input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F)
+ }
+
+ output = args$output
+
+ #function to create a list of infos from file path
+ extract_info_from_path <- function(path) {
+ file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1]
+ date=tail(strsplit(file_name,"_")[[1]],n=1)
+ tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_")
+ return (c(date,tissue,file_name,path))
+ }
+
+ #data_frame building
+ paths=strsplit(args$atlas,",")[[1]]
+ tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE)
+ df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE)
+ names(df) <- c("date","tissue","filename","path")
+
+ # Annotations
+ res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE)
+
+ colnames(res)=df$filename
+
+ # Write output
+ if (input_type == "list") {
+ res = cbind(as.matrix(input), res)
+ colnames(res)[1] = "Uniprot accession number"
+ } else if (input_type == "file") {
+ res = cbind(file, res)
+ }
+ res = as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x)))
+ write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE)
+
+}
+
+main()
+#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine"
+
diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Get_ms-ms_observations.xml Tue Dec 18 09:50:50 2018 -0500
@@ -0,0 +1,128 @@
+
+ [Peptide Atlas]
+
+ R
+
+
+
+
+
+ $__tool_directory__/Get_ms-ms_observations.R
+ --input_type="$input.ids"
+ #if $input.ids == "list"
+ --input="$input.list"
+ #else
+ --input="$input.file"
+ --column_number="$input.ncol"
+ --header="$input.header"
+ #end if
+ --atlas=${",".join([$__tool_data_path__+"/"+str(ref) for ref in str($atlas).split(",")])}
+ --output="$output"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 23671dd35026 -r 6ab9d2778f04 peptide_atlas.loc.sample
--- a/peptide_atlas.loc.sample Wed Sep 19 05:24:38 2018 -0400
+++ b/peptide_atlas.loc.sample Tue Dec 18 09:50:50 2018 -0500
@@ -3,3 +3,10 @@
#This is a tab separated file (TAB, not 4 spaces !)
#
#
+Human_Brain Human Brain 25/07/2018 peptide_atlas/Human_Brain_25-07-2018.tsv
+Human_CSF Human CSF 25/07/2018 peptide_atlas/Human_CSF_25-07-2018.tsv
+Human_Heart Human Heart 25/07/2018 peptide_atlas/Human_Heart_25-07-2018.tsv
+Human_Kidney Human Kidney 25/07/2018 peptide_atlas/Human_Kidney_25-07-2018.tsv
+Human_Liver Human Liver 25/07/2018 peptide_atlas/Human_Liver_25-07-2018.tsv
+Human_Plasma Human Plasma 25/07/2018 peptide_atlas/Human_Plasma_25-07-2018.tsv
+Human_Urine Human Urine 25/07/2018 peptide_atlas/Human_Urine_25-07-2018.tsv
diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.R
--- a/proteore_ms_observation_pepatlas.R Wed Sep 19 05:24:38 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-# Read file and return file content as data.frame
-readfile <- function(filename, header) {
- if (header == "true") {
- # Read only first line of the file as header:
- headers <- try(read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE)
- if (!inherits(headers, 'try-error')){
- file
- } else {
- stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !")
- }
- #Read the data of the files (skipping the first row)
- file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
- # Remove empty rows
- file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
- #And assign the header to the data
- names(file) <- headers
- }
- else {
- file <- try(read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE)
- if (!inherits(file, 'try-error')){
- file
- } else {
- stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !")
- }
- # Remove empty rows
- file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
- }
- return(file)
-}
-
-nb_obs_PeptideAtlas <- function(input, atlas_file) {
- ## Calculate the sum of n_observations for each ID in input
- atlas = readfile(atlas_file, "true")
- return(atlas$nb_obs[match(input,atlas$Uniprot_AC)])
-}
-
-main = function() {
- args <- commandArgs(TRUE)
- if(length(args)<1) {
- args <- c("--help")
- }
-
- # Help section
- if("--help" %in% args) {
- cat("Selection and Annotation HPA
- Arguments:
- --input_type: type of input (list of id or filename)
- --input: input
- --atlas: list of file(s) path to use
- --output: text output filename \n")
- q(save="no")
- }
-
- # Parse arguments
- parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
- argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
- args <- as.list(as.character(argsDF$V2))
- names(args) <- argsDF$V1
-
- #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda")
- #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda")
-
- # Extract input
- input_type = args$input_type
- if (input_type == "list") {
- input = strsplit(args$input, "[ \t\n]+")[[1]]
- } else if (input_type == "file") {
- filename = args$input
- ncol = args$column
- # Check ncol
- if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
- stop("Please enter an integer for level")
- } else {
- ncol = as.numeric(gsub("c", "", ncol))
- }
- header = args$header
- # Get file content
- file = readfile(filename, header)
- # Extract Protein IDs list
- input = c()
- for (row in as.character(file[,ncol])) {
- input = c(input, strsplit(row, ";")[[1]][1])
- }
- }
-
- output = args$output
-
- #function to create a list of infos from file path
- extract_info_from_path <- function(path) {
- file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1]
- date=tail(strsplit(file_name,"_")[[1]],n=1)
- tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_")
- return (c(date,tissue,file_name,path))
- }
-
- #data_frame building
- paths=strsplit(args$atlas,",")[[1]]
- tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE)
- df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE)
- names(df) <- c("date","tissue","filename","path")
-
- # Annotations
- res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE)
- names=df$filename
-
- # Write output
- if (input_type == "list") {
- res = cbind(as.matrix(input), res)
- names = c("Uniprot accession number", names)
- colnames(res) = names
- write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE)
- } else if (input_type == "file") {
- names = c(names(file), names)
- output_content = cbind(file, res)
- colnames(output_content) = names
- write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE)
- }
-}
-
-main()
-#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine"
-
diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.xml
--- a/proteore_ms_observation_pepatlas.xml Wed Sep 19 05:24:38 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-
-
- – homo sapiens only
-
-
- R
-
-
-
-
-
- $__tool_directory__/proteore_ms_observation_pepatlas.R
- --input_type="$input.ids"
- #if $input.ids == "list"
- --input="$input.list"
- #else
- --input="$input.file"
- --column_number="$input.ncol"
- --header="$input.header"
- #end if
- --atlas="$atlas"
- --output="$output"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-