Mercurial > repos > workflow4metabolomics > ms2snoop
changeset 1:df2672c37732 draft
planemo upload commit 42359ca78388ce5221bc88905a78c996c758aa43
author | workflow4metabolomics |
---|---|
date | Tue, 24 May 2022 18:14:49 +0000 |
parents | 91a3242fd67f |
children | a35fde23940e |
files | MS2snoop.R MS2snoop.xml README.md |
diffstat | 3 files changed, 334 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/MS2snoop.R Mon Apr 25 08:23:54 2022 +0000 +++ b/MS2snoop.R Tue May 24 18:14:49 2022 +0000 @@ -13,8 +13,22 @@ #' #' @import optparse #' -NULL + + +assign("MS2SNOOP_VERSION", "1.0.1") +lockBinding("MS2SNOOP_VERSION", globalenv()) + +assign("MISSING_PARAMETER_ERROR", 1) +lockBinding("MISSING_PARAMETER_ERROR", globalenv()) +assign("BAD_PARAMETER_VALUE_ERROR", 2) +lockBinding("BAD_PARAMETER_VALUE_ERROR", globalenv()) + +assign("MISSING_INPUT_FILE_ERROR", 3) +lockBinding("MISSING_INPUT_FILE_ERROR", globalenv()) + +assign("NO_ANY_RESULT_ERROR", 255) +lockBinding("NO_ANY_RESULT_ERROR", globalenv()) assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") @@ -47,9 +61,6 @@ lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv()) -debug <- FALSE - - ######################################################################## #' @title plot_pseudo_spectra @@ -213,7 +224,7 @@ ## files (collision energy) ## this lead to a processing for each fileid mf <- levels(as.factor(sprecini$fileid)) - if (length(mf) > 1) { + if (length(mf) > 1 && global_verbose) { cat(" several files detected for this compounds :\n") } @@ -239,7 +250,9 @@ ## creation of cross table row=scan col=mz X=ra vmz <- levels(as.factor(sfrgtfil$mznominal)) - cat(" fragments :", vmz) + if (global_verbose) { + cat(" fragments :", vmz) + } ## mz of precursor in data precursor to check correlation with mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal)) @@ -271,7 +284,8 @@ ) } } - if (debug) { + if (global_debug) { + print(ds_abs_int) write.table( x = ds_abs_int, file = paste0(c_name, "ds_abs_int.txt"), @@ -358,7 +372,9 @@ if (!is.null(res_comp_by_file)) { res_comp <- rbind(res_comp, res_comp_by_file) } - cat("\n") + if (global_verbose) { + cat("\n") + } dev.off() } } else { @@ -368,6 +384,25 @@ return(res_comp) } +set_global <- function(var, value) { + assign(var, value, envir = globalenv()) +} + +set_debug <- function() { + set_global("global_debug", TRUE) +} + +unset_debug <- function() { + set_global("global_debug", FALSE) +} + +set_verbose <- function() { + set_global("global_verbose", TRUE) +} + +unset_verbose <- function() { + set_global("global_verbose", FALSE) +} create_parser <- function() { parser <- optparse::OptionParser() @@ -376,7 +411,27 @@ c("-v", "--verbose"), action = "store_true", default = FALSE, - help = "Print extra output [default %default]" + help = paste( + "[default %default]", + "Print extra output" + ) + ) + parser <- optparse::add_option( + parser, + c("-V", "--version"), + action = "store_true", + default = FALSE, + help = "Prints version and exits" + ) + parser <- optparse::add_option( + parser, + c("-d", "--debug"), + action = "store_true", + default = FALSE, + help = paste( + "[default %default]", + "Print debug outputs" + ) ) parser <- optparse::add_option( parser, @@ -416,7 +471,11 @@ type = "numeric", action = "store", default = DEFAULT_TOLMZ, - metavar = "number" + metavar = "number", + help = paste( + "[default %default]", + "Tolerance for MZ (in Dalton) to match the standard in the compounds" + ) ) parser <- optparse::add_option( parser, @@ -424,16 +483,23 @@ type = "integer", action = "store", default = DEFAULT_TOLRT, - metavar = "number" + metavar = "number", + help = paste( + "[default %default]", + "RT (in seconds) to match the standard in the compounds" + ) ) parser <- optparse::add_option( parser, c("--seuil_ra"), type = "numeric", action = "store", - help = "relative intensity threshold", default = DEFAULT_SEUIL_RA, - metavar = "number" + metavar = "number", + help = paste( + "[default %default]", + "relative intensity threshold" + ), ) parser <- optparse::add_option( parser, @@ -441,7 +507,10 @@ type = "integer", default = DEFAULT_MZDECIMAL, action = "store", - help = "nb decimal for mz", + help = paste( + "[default %default]", + "Number of decimal to write for MZ" + ), metavar = "number" ) parser <- optparse::add_option( @@ -450,8 +519,9 @@ type = "integer", default = DEFAULT_R_THRESHOLD, action = "store", - help = paste0( - "r pearson correlation threshold between precursor and fragment ", + help = paste( + "[default %default]", + "R-Pearson correlation threshold between precursor and fragment", "absolute intensity" ), metavar = "number" @@ -462,17 +532,112 @@ type = "numeric", action = "store", default = DEFAULT_MINNUMBERSCAN, - help = paste0( - "fragments are kept if there are found in a minimum number ", - "of scans" + help = paste( + "[default %default]", + "Fragments are kept if there are found in a minimum number", + "of min_number_scan scans" ), metavar = "number" ) return(parser) } +stop_with_status <- function(msg, status) { + message(sprintf("Error: %s", msg)) + message(sprintf("Error code: %s", status)) + base::quit(status = status) +} + +check_args_validity <- function(args) { ## nolint cyclocomp_linter + sysvars <- Sys.getenv() + sysvarnames <- names(sysvars) + if (length(args$output) == 0 || nchar(args$output[1]) == 0) { + stop_with_status( + "Missing output parameters. Please set it with --output.", + MISSING_PARAMETER_ERROR + ) + } + if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) { + stop_with_status( + "Missing precursors parameters. Please set it with --precursors.", + MISSING_PARAMETER_ERROR + ) + } + if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) { + stop_with_status( + "Missing fragments parameters. Please set it with --fragments.", + MISSING_PARAMETER_ERROR + ) + } + if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) { + stop_with_status( + "Missing compounds parameters. Please set it with --compounds.", + MISSING_PARAMETER_ERROR + ) + } + if (!file.exists(args$precursors)) { + stop_with_status( + sprintf( + "Precursors file %s does not exist or cannot be accessed.", + args$precursors + ), + MISSING_INPUT_FILE_ERROR + ) + } + if (!file.exists(args$fragments)) { + stop_with_status( + sprintf( + "Fragments file %s does not exist or cannot be accessed.", + args$fragments + ), + MISSING_INPUT_FILE_ERROR + ) + } + if (!file.exists(args$compounds)) { + stop_with_status( + sprintf( + "Compounds file %s does not exist or cannot be accessed.", + args$compounds + ), + MISSING_INPUT_FILE_ERROR + ) + } + if ( + "_GALAXY_JOB_HOME_DIR" %in% sysvarnames + || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames + || "GALAXY_MEMORY_MB" %in% sysvarnames + || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames + || "GALAXY_SLOTS" %in% sysvarnames + ) { + check_galaxy_args_validity(args) + } +} + +check_galaxy_args_validity <- function(args) { + if (!file.exists(args$output)) { + stop_with_status( + sprintf( + "Output file %s does not exist or cannot be accessed.", + args$output + ), + MISSING_INPUT_FILE_ERROR + ) + } +} + main <- function(args) { - ## FOLDER AND FILES + if (args$version) { + cat(sprintf("%s\n", MS2SNOOP_VERSION)) + base::quit(status = 0) + } + sessionInfo() + check_args_validity(args) + if (args$debug) { + set_debug() + } + if (args$verbose) { + set_verbose() + } ## MSpurity precursors file precursors <- read.table( file = args$precursors, @@ -494,22 +659,8 @@ quote = "\"", header = TRUE ) - ## PARAMETERS - ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds - ## list with the precursor MSpurity file - tolmz <- args$tolmz - tolrt <- args$tolrt - ## relative intensity threshold - seuil_ra <- args$seuil_ra - ## nb decimal for mz - mzdecimal <- args$mzdecimal - ## r pearson correlation threshold between precursor and - # #fragment absolute intensity - r_threshold <- args$r_threshold - ## fragments are kept if there are found in a minimum number of scans - min_number_scan <- args$min_number_scan - + res_all <- NULL for (i in seq_len(nrow(compounds))) { ## loop execution for all compounds in the compounds file res_cor <- NULL @@ -519,22 +670,24 @@ mzref = compounds[[2]][i], rtref = compounds[[3]][i], c_name = compounds[[1]][i], - min_number_scan = min_number_scan, - mzdecimal = mzdecimal, - r_threshold = r_threshold, - seuil_ra = seuil_ra, - tolmz = tolmz, - tolrt = tolrt + min_number_scan = args$min_number_scan, + mzdecimal = args$mzdecimal, + r_threshold = args$r_threshold, + seuil_ra = args$seuil_ra, + tolmz = args$tolmz, + tolrt = args$tolrt ) - if (i == 1 & !is.null(res_cor)) { - res_all <- res_cor - } else if (!is.null(res_cor)) { - res_all <- rbind(res_all, res_cor) + if (!is.null(res_cor)) { + if (is.null(res_all)) { + res_all <- res_cor + } else { + res_all <- rbind(res_all, res_cor) + } } } if (is.null(res_all)) { - stop("No result at all!") + stop_with_status("No result at all!", NO_ANY_RESULT_ERROR) } write.table( x = res_all, @@ -544,8 +697,9 @@ ) } +unset_debug() +unset_verbose() args <- optparse::parse_args(create_parser()) -sessionInfo() main(args) warnings()
--- a/MS2snoop.xml Mon Apr 25 08:23:54 2022 +0000 +++ b/MS2snoop.xml Tue May 24 18:14:49 2022 +0000 @@ -1,93 +1,140 @@ -<tool id="ms2snoop" name="MS2 Snoop" version="1.0.0" profile="21.05"> +<tool id="ms2snoop" name="MS2 Snoop" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01"> + <macros> + <token name="@TOOL_VERSION@">1.0.1</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> <description> MS1/MS2 spectra and associated adducts extraction and cleaning </description> - + <edam_topics> + <edam_topic>topic_0091</edam_topic> + <edam_topic>topic_3370</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3803</edam_operation> + <edam_operation>operation_3860</edam_operation> + </edam_operations> <requirements> - <requirement type="package">r-optparse</requirement> + <requirement type="package" version="4.1.3">r-base</requirement> + <requirement type="package" version="1.7.1">r-optparse</requirement> </requirements> - - <command><![CDATA[ + <stdio> + <exit_code range="1" level="fatal" description="Missing parameter error" /> + <exit_code range="2" level="fatal" description="Bad parameter's value" /> + <exit_code range="3" level="fatal" description="Missing input file" /> + <exit_code range="4:254" level="fatal" description="Unknown error" /> + <exit_code range="255" level="fatal" description="No any result to output" /> + <regex match="Error in\s+.*:\s+.*" /> + </stdio> + <version_command> + Rscript '$__tool_directory__/MS2snoop.R' --version | head -n 1 + </version_command> + <command> + <![CDATA[ Rscript '$__tool_directory__/MS2snoop.R' - -o '$frag_result_txt' - -c '$compound_txt' - -f '$peaklist_frag_tsv' - -p '$peaklist_preco_tsv' + --output '$frag_result_txt' + --compounds '$compound_txt' + --fragments '$peaklist_frag_tsv' + --precursors '$peaklist_preco_tsv' --tolmz '$tolmz' --tolrt '$tolrt' --seuil_ra '$seuil_ra' --mzdecimal '$mzdecimal' --r_threshold '$r_threshold' --min_number_scan '$min_number_scan' -; - ]]></command> - + $advenced.debug + $advenced.verbose + ]]> + </command> <inputs> <param type="data" - format="tabular" - name="compound_txt" - label="compounds_pos.txt" + format="tabular,csv" + argument="--compound_txt" + label="list of compounds : col1=Name of molecule, col2=m/z, col3=retention time" /> <param type="data" - format="tabular" - name="peaklist_frag_tsv" - label="peaklist_fragments.tsv" + format="tabular,csv" + argument="--peaklist_frag_tsv" + label="MSpurity fragments file" /> <param type="data" - format="tabular" - name="peaklist_preco_tsv" - label="peaklist_precursors.tsv" + format="tabular,csv" + argument="--peaklist_preco_tsv" + label="MSpurity precursors file" /> <param - name="tolmz" type="float" min="0.0001" max="10" value="0.01" label="MZ Tolerence" + argument="--tolmz" /> <param - name="tolrt" type="integer" min="0" max="30" value="20" label="RT Tolerence" + argument="--tolrt" /> <param - name="seuil_ra" type="float" min="0" max="1" value="0.05" label="r pearson correlation threshold between precursor and fragment absolute intensity" + argument="--seuil_ra" /> <param - name="mzdecimal" type="integer" min="0" max="5" value="0" label="nb decimal for mz" + argument="--mzdecimal" /> <param - name="r_threshold" type="float" min="0" - value="0.85" label="fragment absolute intensity" + value="0.85" + label="fragment absolute intensity" + argument="--r_threshold" /> <param - name="min_number_scan" type="integer" min="0" max="25" value="8" label="Present in at least X scan" + argument="--min_number_scan" help="fragments are kept if they are found in a minimum number of scans" /> + <section title="Advenced Options" name="advenced"> + <param + type="boolean" + value="" + label="Verbose logs" + argument="--verbose" + truevalue="--verbose" + falsevalue="" + help="The tool will print more logs" + optional="true" + /> + <param + type="boolean" + value="" + label="Debug statements" + argument="--debug" + truevalue="--debug" + falsevalue="" + help="The tool will print debug statements" + optional="true" + /> + </section> </inputs> <outputs> <data @@ -98,20 +145,22 @@ </outputs> <tests> <test> - <param name="compound_txt" value="compounds_pos.txt"/> - <param name="peaklist_frag_tsv" value="peaklist_fragments.tsv"/> - <param name="peaklist_preco_tsv" value="peaklist_precursors.tsv"/> + <param name="compound_txt" value="compounds_pos.txt" /> + <param name="peaklist_frag_tsv" value="peaklist_fragments.tsv" /> + <param name="peaklist_preco_tsv" value="peaklist_precursors.tsv" /> <output name="frag_result_txt" file="compound_fragments_result.txt" /> </test> </tests> + <help><![CDATA[ +.. class:: infomark - <help><![CDATA[ +**Authors** Jean-Francois Martin (jean-francois.martin@inrae.fr), Lain Pavot (lain.pavot@inrae.fr), Kevin Wagner (kevin.wagner@inrae.fr) .. class:: infomark -**Authors** Jean-Francois Martin (jean-francois.martin@inrae.fr), Lain Pavot (lain.pavot@inrae.fr), Kevin Wagner (kevin.wagner@inrae.fr) -.. class:: infomark + --------------------------------------------------- + ============== MS2 validation ============== @@ -127,19 +176,28 @@ Workflow position ----------------- -**Upstream tools** -========================= ================= ======= ========= -Name output file format parameter -========================= ================= ======= ========= -msPurity.purityA NA TSV NA -msPurity.frag4feature NA TSV NA -========================= ================= ======= ========= -**Downstream tools** -========================= ================= ======= ========= -Name output file format parameter -========================= ================= ======= ========= -NA NA NA NA -========================= ================= ======= ========= +-------------- +Upstream tools +-------------- + ++-------------------------+-----------------+--------+------------+ +| Name | output file | format | parameter | ++=========================+=================+========+============+ +| msPurity.purityA | NA | TSV | NA | ++-------------------------+-----------------+--------+------------+ +| msPurity.frag4feature | NA | TSV | NA | ++-------------------------+-----------------+--------+------------+ + +---------------- +Downstream tools +---------------- + ++-------------------------+-----------------+--------+------------+ +| Name | output file | format | parameter | ++=========================+=================+========+============+ +| NA | NA | NA | NA | ++-------------------------+-----------------+--------+------------+ + ----------- Input files @@ -149,8 +207,10 @@ | Parameter : num + label | Format | +===========================+==================+============+ | Input from msPurity Precursor | TSV | ++----------------------------------------------+------------+ | Input from msPurity fragment | TSV | | Input compounds file to search in precursor | | ++----------------------------------------------+------------+ | and fragment (Name + m/z + ret Time) | TSV | +----------------------------------------------+------------+ @@ -159,35 +219,43 @@ Parameters ---------- -msPurity.purityA output TSV file -msPurity.frag4feature output TSV file +msPurity.purityA + | output TSV file + +msPurity.frag4feature + | output TSV file + Compounds file -| A TSV TXT file with a list of compounds with at least 3 columns : -| col1=Name of molecule, col2=m/z, col3=retention time + | A TSV TXT file with a list of compounds with at least 3 columns : + | col1=Name of molecule, col2=m/z, col3=retention time + tolerance for mz(dalton) rt(seconds) to match the standard in the compounds file with the precursor MSpurity files -| tolmz <- 0.01 -| tolrt <- 20 + | tolmz <- 0.01 + | tolrt <- 20 + relative intensity threshold -| seuil_ra = 0.05 + | seuil_ra = 0.05 + nb decimal for mz -| mzdecimal <- 0 + | mzdecimal <- 0 + r pearson correlation threshold between precursor and fragment absolute intensity -| r_threshold <- 0.85 + | r_threshold <- 0.85 + fragments are kept if there are found in a minimum number of scans -| minNumberScan <- 8 + | minNumberScan <- 8 ------------ Output files ------------ compound_fragments_result.tsv -| tabular output -| Array with p rows (corresponding to the fragments for the different compounds of the compounds file) -| Last column "corvalid" is a boolean for validated fragments + | tabular output + | Array with p rows (corresponding to the fragments for the different compounds of the compounds file) + | Last column "corvalid" is a boolean for validated fragments + processing_file.pdf -| pdf output -| For each compound of the compounds file, Graph of the all fragments with parents (or most intense peak if parent is not detected in the fragments. At the end the pdf file a graph of the spectra with validated fragments. - + | pdf output + | For each compound of the compounds file, Graph of the all fragments with parents (or most intense peak if parent is not detected in the fragments. At the end the pdf file a graph of the spectra with validated fragments. ]]></help> - </tool>
--- a/README.md Mon Apr 25 08:23:54 2022 +0000 +++ b/README.md Tue May 24 18:14:49 2022 +0000 @@ -5,7 +5,7 @@ ----------- * **@name**: MS2 Snoop - * **@version**: 1.0.0 + * **@version**: 1.0.1 * **@authors**: Jean François Martin (INRAE) * **@maintainers**: Lain Pavot (PFEM - INRAE - MetaboHUB) * **@init date**: 2022, April