changeset 1:df2672c37732 draft

planemo upload commit 42359ca78388ce5221bc88905a78c996c758aa43
author workflow4metabolomics
date Tue, 24 May 2022 18:14:49 +0000
parents 91a3242fd67f
children a35fde23940e
files MS2snoop.R MS2snoop.xml README.md
diffstat 3 files changed, 334 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/MS2snoop.R	Mon Apr 25 08:23:54 2022 +0000
+++ b/MS2snoop.R	Tue May 24 18:14:49 2022 +0000
@@ -13,8 +13,22 @@
 #'
 #' @import optparse
 #'
-NULL
+
+
+assign("MS2SNOOP_VERSION", "1.0.1")
+lockBinding("MS2SNOOP_VERSION", globalenv())
+
+assign("MISSING_PARAMETER_ERROR", 1)
+lockBinding("MISSING_PARAMETER_ERROR", globalenv())
 
+assign("BAD_PARAMETER_VALUE_ERROR", 2)
+lockBinding("BAD_PARAMETER_VALUE_ERROR", globalenv())
+
+assign("MISSING_INPUT_FILE_ERROR", 3)
+lockBinding("MISSING_INPUT_FILE_ERROR", globalenv())
+
+assign("NO_ANY_RESULT_ERROR", 255)
+lockBinding("NO_ANY_RESULT_ERROR", globalenv())
 
 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv")
 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv")
@@ -47,9 +61,6 @@
 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv())
 
 
-debug <- FALSE
-
-
 ########################################################################
 
 #' @title plot_pseudo_spectra
@@ -213,7 +224,7 @@
     ## files (collision energy)
     ## this lead to a processing for each fileid
     mf <- levels(as.factor(sprecini$fileid))
-    if (length(mf) > 1) {
+    if (length(mf) > 1 && global_verbose) {
       cat(" several files detected for this compounds :\n")
     }
 
@@ -239,7 +250,9 @@
       ## creation of cross table row=scan col=mz X=ra
       vmz <- levels(as.factor(sfrgtfil$mznominal))
 
-      cat(" fragments :", vmz)
+      if (global_verbose) {
+        cat(" fragments :", vmz)
+      }
 
       ## mz of precursor in data precursor to check correlation with
       mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal))
@@ -271,7 +284,8 @@
           )
         }
       }
-      if (debug) {
+      if (global_debug) {
+        print(ds_abs_int)
         write.table(
           x = ds_abs_int,
           file = paste0(c_name, "ds_abs_int.txt"),
@@ -358,7 +372,9 @@
       if (!is.null(res_comp_by_file)) {
         res_comp <- rbind(res_comp, res_comp_by_file)
       }
-      cat("\n")
+      if (global_verbose) {
+        cat("\n")
+      }
       dev.off()
     }
   } else {
@@ -368,6 +384,25 @@
   return(res_comp)
 }
 
+set_global <- function(var, value) {
+  assign(var, value, envir = globalenv())
+}
+
+set_debug <- function() {
+  set_global("global_debug", TRUE)
+}
+
+unset_debug <- function() {
+  set_global("global_debug", FALSE)
+}
+
+set_verbose <- function() {
+  set_global("global_verbose", TRUE)
+}
+
+unset_verbose <- function() {
+  set_global("global_verbose", FALSE)
+}
 
 create_parser <- function() {
   parser <- optparse::OptionParser()
@@ -376,7 +411,27 @@
     c("-v", "--verbose"),
     action = "store_true",
     default = FALSE,
-    help = "Print extra output [default %default]"
+    help = paste(
+      "[default %default]",
+      "Print extra output"
+    )
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-V", "--version"),
+    action = "store_true",
+    default = FALSE,
+    help = "Prints version and exits"
+  )
+  parser <- optparse::add_option(
+    parser,
+    c("-d", "--debug"),
+    action = "store_true",
+    default = FALSE,
+    help = paste(
+      "[default %default]",
+      "Print debug outputs"
+    )
   )
   parser <- optparse::add_option(
     parser,
@@ -416,7 +471,11 @@
     type = "numeric",
     action = "store",
     default = DEFAULT_TOLMZ,
-    metavar = "number"
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "Tolerance for MZ (in Dalton) to match the standard in the compounds"
+    )
   )
   parser <- optparse::add_option(
     parser,
@@ -424,16 +483,23 @@
     type = "integer",
     action = "store",
     default = DEFAULT_TOLRT,
-    metavar = "number"
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "RT (in seconds) to match the standard in the compounds"
+    )
   )
   parser <- optparse::add_option(
     parser,
     c("--seuil_ra"),
     type = "numeric",
     action = "store",
-    help = "relative intensity threshold",
     default = DEFAULT_SEUIL_RA,
-    metavar = "number"
+    metavar = "number",
+    help = paste(
+      "[default %default]",
+      "relative intensity threshold"
+    ),
   )
   parser <- optparse::add_option(
     parser,
@@ -441,7 +507,10 @@
     type = "integer",
     default = DEFAULT_MZDECIMAL,
     action = "store",
-    help = "nb decimal for mz",
+    help = paste(
+      "[default %default]",
+      "Number of decimal to write for MZ"
+    ),
     metavar = "number"
   )
   parser <- optparse::add_option(
@@ -450,8 +519,9 @@
     type = "integer",
     default = DEFAULT_R_THRESHOLD,
     action = "store",
-    help = paste0(
-      "r pearson correlation threshold between precursor and fragment ",
+    help = paste(
+      "[default %default]",
+      "R-Pearson correlation threshold between precursor and fragment",
       "absolute intensity"
     ),
     metavar = "number"
@@ -462,17 +532,112 @@
     type = "numeric",
     action = "store",
     default = DEFAULT_MINNUMBERSCAN,
-    help = paste0(
-      "fragments are kept if there are found in a minimum number ",
-      "of scans"
+    help = paste(
+      "[default %default]",
+      "Fragments are kept if there are found in a minimum number",
+      "of min_number_scan scans"
     ),
     metavar = "number"
   )
   return(parser)
 }
 
+stop_with_status <- function(msg, status) {
+  message(sprintf("Error: %s", msg))
+  message(sprintf("Error code: %s", status))
+  base::quit(status = status)
+}
+
+check_args_validity <- function(args) { ## nolint cyclocomp_linter
+  sysvars <- Sys.getenv()
+  sysvarnames <- names(sysvars)
+  if (length(args$output) == 0 || nchar(args$output[1]) == 0) {
+    stop_with_status(
+      "Missing output parameters. Please set it with --output.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) {
+    stop_with_status(
+      "Missing precursors parameters. Please set it with --precursors.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) {
+    stop_with_status(
+      "Missing fragments parameters. Please set it with --fragments.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) {
+    stop_with_status(
+      "Missing compounds parameters. Please set it with --compounds.",
+      MISSING_PARAMETER_ERROR
+    )
+  }
+  if (!file.exists(args$precursors)) {
+    stop_with_status(
+      sprintf(
+        "Precursors file %s does not exist or cannot be accessed.",
+        args$precursors
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (!file.exists(args$fragments)) {
+    stop_with_status(
+      sprintf(
+        "Fragments file %s does not exist or cannot be accessed.",
+        args$fragments
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (!file.exists(args$compounds)) {
+    stop_with_status(
+      sprintf(
+        "Compounds file %s does not exist or cannot be accessed.",
+        args$compounds
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+  if (
+    "_GALAXY_JOB_HOME_DIR" %in% sysvarnames
+    || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames
+    || "GALAXY_MEMORY_MB" %in% sysvarnames
+    || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames
+    || "GALAXY_SLOTS" %in% sysvarnames
+  ) {
+    check_galaxy_args_validity(args)
+  }
+}
+
+check_galaxy_args_validity <- function(args) {
+  if (!file.exists(args$output)) {
+    stop_with_status(
+      sprintf(
+        "Output file %s does not exist or cannot be accessed.",
+        args$output
+      ),
+      MISSING_INPUT_FILE_ERROR
+    )
+  }
+}
+
 main <- function(args) {
-  ## FOLDER AND FILES
+  if (args$version) {
+    cat(sprintf("%s\n", MS2SNOOP_VERSION))
+    base::quit(status = 0)
+  }
+  sessionInfo()
+  check_args_validity(args)
+  if (args$debug) {
+    set_debug()
+  }
+  if (args$verbose) {
+    set_verbose()
+  }
   ## MSpurity precursors file
   precursors <- read.table(
     file = args$precursors,
@@ -494,22 +659,8 @@
     quote = "\"",
     header = TRUE
   )
-  ## PARAMETERS
-  ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds
-  ## list with the precursor MSpurity file
-  tolmz <- args$tolmz
-  tolrt <- args$tolrt
 
-  ##  relative intensity threshold
-  seuil_ra <- args$seuil_ra
-  ## nb decimal for mz
-  mzdecimal <- args$mzdecimal
-  ## r pearson correlation threshold between precursor and
-  # #fragment absolute intensity
-  r_threshold <- args$r_threshold
-  ## fragments are kept if there are found in a minimum number of scans
-  min_number_scan <- args$min_number_scan
-
+  res_all <- NULL
   for (i in seq_len(nrow(compounds))) {
     ## loop execution for all compounds in the compounds file
     res_cor <- NULL
@@ -519,22 +670,24 @@
       mzref = compounds[[2]][i],
       rtref = compounds[[3]][i],
       c_name = compounds[[1]][i],
-      min_number_scan = min_number_scan,
-      mzdecimal = mzdecimal,
-      r_threshold = r_threshold,
-      seuil_ra = seuil_ra,
-      tolmz = tolmz,
-      tolrt = tolrt
+      min_number_scan = args$min_number_scan,
+      mzdecimal = args$mzdecimal,
+      r_threshold = args$r_threshold,
+      seuil_ra = args$seuil_ra,
+      tolmz = args$tolmz,
+      tolrt = args$tolrt
     )
-    if (i == 1 & !is.null(res_cor)) {
-      res_all <- res_cor
-    } else if (!is.null(res_cor)) {
-      res_all <- rbind(res_all, res_cor)
+    if (!is.null(res_cor)) {
+      if (is.null(res_all)) {
+        res_all <- res_cor
+      } else {
+        res_all <- rbind(res_all, res_cor)
+      }
     }
   }
 
   if (is.null(res_all)) {
-    stop("No result at all!")
+    stop_with_status("No result at all!", NO_ANY_RESULT_ERROR)
   }
   write.table(
     x = res_all,
@@ -544,8 +697,9 @@
   )
 }
 
+unset_debug()
+unset_verbose()
 args <- optparse::parse_args(create_parser())
-sessionInfo()
 main(args)
 
 warnings()
--- a/MS2snoop.xml	Mon Apr 25 08:23:54 2022 +0000
+++ b/MS2snoop.xml	Tue May 24 18:14:49 2022 +0000
@@ -1,93 +1,140 @@
-<tool id="ms2snoop" name="MS2 Snoop" version="1.0.0" profile="21.05">
+<tool id="ms2snoop" name="MS2 Snoop" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
     <description>
         MS1/MS2 spectra and associated adducts extraction and cleaning
     </description>
-
+    <edam_topics>
+        <edam_topic>topic_0091</edam_topic>
+        <edam_topic>topic_3370</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_3803</edam_operation>
+        <edam_operation>operation_3860</edam_operation>
+    </edam_operations>
     <requirements>
-        <requirement type="package">r-optparse</requirement>
+        <requirement type="package" version="4.1.3">r-base</requirement>
+        <requirement type="package" version="1.7.1">r-optparse</requirement>
     </requirements>
-
-    <command><![CDATA[
+    <stdio>
+        <exit_code range="1" level="fatal" description="Missing parameter error" />
+        <exit_code range="2" level="fatal" description="Bad parameter's value" />
+        <exit_code range="3" level="fatal" description="Missing input file" />
+        <exit_code range="4:254" level="fatal" description="Unknown error" />
+        <exit_code range="255" level="fatal" description="No any result to output" />
+        <regex match="Error in\s+.*:\s+.*" />
+    </stdio>
+    <version_command>
+        Rscript '$__tool_directory__/MS2snoop.R' --version | head -n 1
+    </version_command>
+    <command>
+        <![CDATA[
 Rscript '$__tool_directory__/MS2snoop.R'
-    -o '$frag_result_txt'
-    -c '$compound_txt'
-    -f '$peaklist_frag_tsv'
-    -p '$peaklist_preco_tsv'
+    --output '$frag_result_txt'
+    --compounds '$compound_txt'
+    --fragments '$peaklist_frag_tsv'
+    --precursors '$peaklist_preco_tsv'
     --tolmz '$tolmz'
     --tolrt '$tolrt'
     --seuil_ra '$seuil_ra'
     --mzdecimal '$mzdecimal'
     --r_threshold '$r_threshold'
     --min_number_scan '$min_number_scan'
-;
-    ]]></command>
-
+    $advenced.debug
+    $advenced.verbose
+        ]]>
+    </command>
     <inputs>
         <param
             type="data"
-            format="tabular"
-            name="compound_txt"
-            label="compounds_pos.txt"
+            format="tabular,csv"
+            argument="--compound_txt"
+            label="list of compounds : col1=Name of molecule, col2=m/z, col3=retention time"
         />
         <param
             type="data"
-            format="tabular"
-            name="peaklist_frag_tsv"
-            label="peaklist_fragments.tsv"
+            format="tabular,csv"
+            argument="--peaklist_frag_tsv"
+            label="MSpurity fragments file"
         />
         <param
             type="data"
-            format="tabular"
-            name="peaklist_preco_tsv"
-            label="peaklist_precursors.tsv"
+            format="tabular,csv"
+            argument="--peaklist_preco_tsv"
+            label="MSpurity precursors file"
         />
         <param
-            name="tolmz"
             type="float"
             min="0.0001"
             max="10"
             value="0.01"
             label="MZ Tolerence"
+            argument="--tolmz"
         />
         <param
-            name="tolrt"
             type="integer"
             min="0"
             max="30"
             value="20"
             label="RT Tolerence"
+            argument="--tolrt"
         />
         <param
-            name="seuil_ra"
             type="float"
             min="0"
             max="1"
             value="0.05"
             label="r pearson correlation threshold between precursor and fragment absolute intensity"
+            argument="--seuil_ra"
         />
         <param
-            name="mzdecimal"
             type="integer"
             min="0"
             max="5"
             value="0"
             label="nb decimal for mz"
+            argument="--mzdecimal"
         />
         <param
-            name="r_threshold"
             type="float"
             min="0"
-            value="0.85" label="fragment absolute intensity"
+            value="0.85"
+            label="fragment absolute intensity"
+            argument="--r_threshold"
         />
         <param
-            name="min_number_scan"
             type="integer"
             min="0"
             max="25"
             value="8"
             label="Present in at least X scan"
+            argument="--min_number_scan"
             help="fragments are kept if they are found in a minimum number of scans"
         />
+        <section title="Advenced Options" name="advenced">
+            <param
+                type="boolean"
+                value=""
+                label="Verbose logs"
+                argument="--verbose"
+                truevalue="--verbose"
+                falsevalue=""
+                help="The tool will print more logs"
+                optional="true"
+            />
+            <param
+                type="boolean"
+                value=""
+                label="Debug statements"
+                argument="--debug"
+                truevalue="--debug"
+                falsevalue=""
+                help="The tool will print debug statements"
+                optional="true"
+            />
+        </section>
     </inputs>
     <outputs>
         <data
@@ -98,20 +145,22 @@
     </outputs>
     <tests>
         <test>
-            <param name="compound_txt" value="compounds_pos.txt"/>
-            <param name="peaklist_frag_tsv" value="peaklist_fragments.tsv"/>
-            <param name="peaklist_preco_tsv" value="peaklist_precursors.tsv"/>
+            <param name="compound_txt" value="compounds_pos.txt" />
+            <param name="peaklist_frag_tsv" value="peaklist_fragments.tsv" />
+            <param name="peaklist_preco_tsv" value="peaklist_precursors.tsv" />
             <output name="frag_result_txt" file="compound_fragments_result.txt" />
         </test>
     </tests>
+    <help><![CDATA[
+.. class:: infomark
 
-    <help><![CDATA[
+**Authors** Jean-Francois Martin (jean-francois.martin@inrae.fr), Lain Pavot (lain.pavot@inrae.fr), Kevin Wagner (kevin.wagner@inrae.fr) 
 
 .. class:: infomark
-**Authors** Jean-Francois Martin (jean-francois.martin@inrae.fr), Lain Pavot (lain.pavot@inrae.fr), Kevin Wagner (kevin.wagner@inrae.fr) 
-.. class:: infomark
+
 ---------------------------------------------------
 
+
 ==============
 MS2 validation  
 ==============
@@ -127,19 +176,28 @@
 Workflow position
 -----------------
 
-**Upstream tools**
-========================= ================= ======= =========
-Name                      output file       format  parameter
-========================= ================= ======= =========
-msPurity.purityA               NA             TSV      NA
-msPurity.frag4feature          NA             TSV      NA
-========================= ================= ======= =========
-**Downstream tools**
-========================= ================= ======= =========
-Name                      output file       format  parameter
-========================= ================= ======= =========
-NA                        NA                NA      NA
-========================= ================= ======= =========
+--------------
+Upstream tools
+--------------
+
++-------------------------+-----------------+--------+------------+
+| Name                    |  output file    | format |  parameter |
++=========================+=================+========+============+
+| msPurity.purityA        |       NA        |   TSV  |    NA      |
++-------------------------+-----------------+--------+------------+
+| msPurity.frag4feature   |       NA        |   TSV  |    NA      |
++-------------------------+-----------------+--------+------------+
+
+----------------
+Downstream tools
+----------------
+
++-------------------------+-----------------+--------+------------+
+| Name                    |  output file    | format |  parameter |
++=========================+=================+========+============+
+| NA                      |       NA        |   NA   |    NA      |
++-------------------------+-----------------+--------+------------+
+
 
 -----------
 Input files
@@ -149,8 +207,10 @@
 | Parameter : num + label                      |   Format   |
 +===========================+==================+============+
 | Input from msPurity Precursor                |   TSV      |
++----------------------------------------------+------------+
 | Input from msPurity fragment                 |   TSV      |
 | Input compounds file to search in precursor  |            |
++----------------------------------------------+------------+
 | and fragment (Name + m/z + ret Time)         |   TSV      |
 +----------------------------------------------+------------+
 
@@ -159,35 +219,43 @@
 Parameters
 ----------
 
-msPurity.purityA output TSV file
-msPurity.frag4feature output TSV file
+msPurity.purityA
+  | output TSV file
+
+msPurity.frag4feature
+  | output TSV file
+
 Compounds file
-| A TSV TXT file with a list of compounds with at least 3 columns : 
-|   col1=Name of molecule, col2=m/z, col3=retention time
+  | A TSV TXT file with a list of compounds with at least 3 columns : 
+  |   col1=Name of molecule, col2=m/z, col3=retention time
+
 tolerance for mz(dalton) rt(seconds) to match the standard in the compounds file with the precursor MSpurity files
-|   tolmz <- 0.01
-|   tolrt <- 20
+  |   tolmz <- 0.01
+  |   tolrt <- 20
+
 relative intensity threshold
-|   seuil_ra = 0.05
+  |   seuil_ra = 0.05
+
 nb decimal for mz
-|   mzdecimal <- 0
+  |   mzdecimal <- 0
+
 r pearson correlation threshold between precursor and fragment absolute intensity
-|   r_threshold <- 0.85
+  |   r_threshold <- 0.85
+
 fragments are kept if there are found in a minimum number of scans
-|   minNumberScan <- 8
+  |   minNumberScan <- 8
 
 ------------
 Output files
 ------------
 
 compound_fragments_result.tsv
-| tabular output
-| Array with p rows (corresponding to the fragments for the different compounds of the compounds file)
-| Last column "corvalid" is a boolean for validated fragments 
+  | tabular output
+  | Array with p rows (corresponding to the fragments for the different compounds of the compounds file)
+  | Last column "corvalid" is a boolean for validated fragments 
+
 processing_file.pdf
-| pdf output 
-| For each compound of the compounds file, Graph of the all fragments with parents (or most intense peak if parent is not detected in the fragments. At the end the pdf file a graph of the spectra with validated fragments.
-
+  | pdf output 
+  | For each compound of the compounds file, Graph of the all fragments with parents (or most intense peak if parent is not detected in the fragments. At the end the pdf file a graph of the spectra with validated fragments.
    ]]></help>
-
 </tool>
--- a/README.md	Mon Apr 25 08:23:54 2022 +0000
+++ b/README.md	Tue May 24 18:14:49 2022 +0000
@@ -5,7 +5,7 @@
 -----------
 
  * **@name**: MS2 Snoop
- * **@version**: 1.0.0
+ * **@version**: 1.0.1
  * **@authors**: Jean François Martin (INRAE)
  * **@maintainers**: Lain Pavot (PFEM - INRAE - MetaboHUB)
  * **@init date**: 2022, April