diff flagRemove.R @ 8:cb4aeec93d49 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
author computational-metabolomics
date Wed, 12 Jun 2024 16:08:23 +0000
parents d4a17be5429a
children 174a523cb1da
line wrap: on
line diff
--- a/flagRemove.R	Tue Feb 08 14:04:07 2022 +0000
+++ b/flagRemove.R	Wed Jun 12 16:08:23 2024 +0000
@@ -2,77 +2,93 @@
 library(optparse)
 print(sessionInfo())
 option_list <- list(
-  make_option(c("-o", "--out_dir"), type = "character", default = getwd(),
-              help = "Output folder for resulting files [default = %default]"
-  ),
-  make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"),
-              help = "The path to the xcmsSet object [default = %default]"
-  ),
-  make_option("--polarity", default = NA,
-              help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
-  ),
-  make_option("--rsd_i_blank", default = 100,
-              help = "RSD threshold for the blank [default = %default]"
-  ),
-  make_option("--minfrac_blank", default = 0.5,
-              help = "minimum fraction of files for features needed for the blank [default = %default]"
-  ),
-  make_option("--rsd_rt_blank", default = 100,
-              help = "RSD threshold for the RT of the blank [default = %default]"
-  ),
-
-  make_option("--ithres_blank", default = 0,
-              help = "Intensity threshold for the blank [default = %default]"
-  ),
-  make_option("--s2b", default = 10,
-              help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.
+    make_option(c("-o", "--out_dir"),
+        type = "character", default = getwd(),
+        help = "Output folder for resulting files [default = %default]"
+    ),
+    make_option(c("-x", "--xset_path"),
+        type = "character", default = file.path(getwd(), "xset.rds"),
+        help = "The path to the xcmsSet object [default = %default]"
+    ),
+    make_option("--polarity",
+        default = NA,
+        help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
+    ),
+    make_option("--rsd_i_blank",
+        default = 100,
+        help = "RSD threshold for the blank [default = %default]"
+    ),
+    make_option("--minfrac_blank",
+        default = 0.5,
+        help = "minimum fraction of files for features needed for the blank [default = %default]"
+    ),
+    make_option("--rsd_rt_blank",
+        default = 100,
+        help = "RSD threshold for the RT of the blank [default = %default]"
+    ),
+    make_option("--ithres_blank",
+        default = 0,
+        help = "Intensity threshold for the blank [default = %default]"
+    ),
+    make_option("--s2b",
+        default = 10,
+        help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.
                     if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
                     1000/10 = 100, so sample has fold change higher than the threshold and the peak
                     is not considered a blank [default = %default]"
-  ),
-  make_option("--blank_class", default = "blank", type = "character",
-              help = "A string representing the class that will be used for the blank.[default = %default]"
-  ),
-  make_option("--egauss_thr", default = NA,
-              help = "Threshold for filtering out non gaussian shaped peaks. Note this only works
+    ),
+    make_option("--blank_class",
+        default = "blank", type = "character",
+        help = "A string representing the class that will be used for the blank.[default = %default]"
+    ),
+    make_option("--egauss_thr",
+        default = NA,
+        help = "Threshold for filtering out non gaussian shaped peaks. Note this only works
                             if the 'verbose columns' and 'fit gauss' was used with xcms
                             [default = %default]"
-  ),
-  make_option("--rsd_i_sample", default = 100,
-              help = "RSD threshold for the samples [default = %default]"
-  ),
-  make_option("--minfrac_sample", default = 0.8,
-              help = "minimum fraction of files for features needed for the samples [default = %default]"
-  ),
-  make_option("--rsd_rt_sample", default = 100,
-              help = "RSD threshold for the RT of the samples [default = %default]"
-  ),
-  make_option("--ithres_sample", default = 5000,
-              help = "Intensity threshold for the sample [default = %default]"
-  ),
-  make_option("--grp_rm_ids", default = NA,
-              help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
+    ),
+    make_option("--rsd_i_sample",
+        default = 100,
+        help = "RSD threshold for the samples [default = %default]"
+    ),
+    make_option("--minfrac_sample",
+        default = 0.8,
+        help = "minimum fraction of files for features needed for the samples [default = %default]"
+    ),
+    make_option("--rsd_rt_sample",
+        default = 100,
+        help = "RSD threshold for the RT of the samples [default = %default]"
+    ),
+    make_option("--ithres_sample",
+        default = 5000,
+        help = "Intensity threshold for the sample [default = %default]"
+    ),
+    make_option("--grp_rm_ids",
+        default = NA,
+        help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
               [default = %default]"
-  ),
-  make_option("--remove_spectra",  action = "store_true",
-              help = "TRUE if flagged spectra is to be removed [default = %default]"
-  ),
-  make_option("--minfrac_xcms", default = 0.5,
-              help = "minfrac for xcms  grouping [default = %default]"
-  ),
-  make_option("--mzwid", default = 0.001,
-              help = "mzwid for xcms  grouping [default = %default]"
-  ),
-  make_option("--bw", default = 5,
-              help = "bw for xcms  grouping [default = %default]"
-  ),
-
-  make_option("--temp_save",  action = "store_true",
-              help = "Assign True if files for each step saved (for testing purposes) [default = %default]"
-  ),
-
-  make_option("--samplelist",  type = "character", help = "Sample list to determine the blank class")
-
+    ),
+    make_option("--remove_spectra",
+        action = "store_true",
+        help = "TRUE if flagged spectra is to be removed [default = %default]"
+    ),
+    make_option("--minfrac_xcms",
+        default = 0.5,
+        help = "minfrac for xcms  grouping [default = %default]"
+    ),
+    make_option("--mzwid",
+        default = 0.001,
+        help = "mzwid for xcms  grouping [default = %default]"
+    ),
+    make_option("--bw",
+        default = 5,
+        help = "bw for xcms  grouping [default = %default]"
+    ),
+    make_option("--temp_save",
+        action = "store_true",
+        help = "Assign True if files for each step saved (for testing purposes) [default = %default]"
+    ),
+    make_option("--samplelist", type = "character", help = "Sample list to determine the blank class")
 )
 
 # nolint start
@@ -88,13 +104,13 @@
 
 if (is.null(opt$temp_save)) {
     temp_save <- FALSE
-}else{
+} else {
     temp_save <- TRUE
 }
 
 if (is.null(opt$remove_spectra)) {
     remove_spectra <- FALSE
-}else{
+} else {
     remove_spectra <- TRUE
 }
 
@@ -103,8 +119,9 @@
 
 getxcmsSetObject <- function(xobject) {
     # XCMS 1.x
-    if (class(xobject) == "xcmsSet")
+    if (class(xobject) == "xcmsSet") {
         return(xobject)
+    }
     # XCMS 3.x
     if (class(xobject) == "XCMSnExp") {
         # Get the legacy xcmsSet object
@@ -116,7 +133,7 @@
 
 
 loadRData <- function(rdata_path, name) {
-#loads an RData file, and returns the named xset object if it is there
+    # loads an RData file, and returns the named xset object if it is there
     load(rdata_path)
     return(get(ls()[ls() %in% name]))
 }
@@ -126,7 +143,7 @@
 print(xset)
 if (is.null(opt$samplelist)) {
     blank_class <- opt$blank_class
-}else{
+} else {
     samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE)
     samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"])
 
@@ -142,25 +159,26 @@
 
 if (is.null(opt$multilist)) {
     ffrm_out <- flag_remove(xset,
-                        pol = opt$polarity,
-                        rsd_i_blank = opt$rsd_i_blank,
-                        minfrac_blank = opt$minfrac_blank,
-                        rsd_rt_blank = opt$rsd_rt_blank,
-                        ithres_blank = opt$ithres_blank,
-                        s2b = opt$s2b,
-                        ref.class = blank_class,
-                        egauss_thr = opt$egauss_thr,
-                        rsd_i_sample = opt$rsd_i_sample,
-                        minfrac_sample = opt$minfrac_sample,
-                        rsd_rt_sample = opt$rsd_rt_sample,
-                        ithres_sample = opt$ithres_sample,
-                        minfrac_xcms = opt$minfrac_xcms,
-                        mzwid = opt$mzwid,
-                        bw = opt$bw,
-                        out_dir = opt$out_dir,
-                        temp_save = temp_save,
-                        remove_spectra = remove_spectra,
-                        grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]])
+        pol = opt$polarity,
+        rsd_i_blank = opt$rsd_i_blank,
+        minfrac_blank = opt$minfrac_blank,
+        rsd_rt_blank = opt$rsd_rt_blank,
+        ithres_blank = opt$ithres_blank,
+        s2b = opt$s2b,
+        ref.class = blank_class,
+        egauss_thr = opt$egauss_thr,
+        rsd_i_sample = opt$rsd_i_sample,
+        minfrac_sample = opt$minfrac_sample,
+        rsd_rt_sample = opt$rsd_rt_sample,
+        ithres_sample = opt$ithres_sample,
+        minfrac_xcms = opt$minfrac_xcms,
+        mzwid = opt$mzwid,
+        bw = opt$bw,
+        out_dir = opt$out_dir,
+        temp_save = temp_save,
+        remove_spectra = remove_spectra,
+        grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]
+    )
     print("flag remove finished")
     xset <- ffrm_out[[1]]
     grp_peaklist <- ffrm_out[[2]]
@@ -172,26 +190,26 @@
     peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv")
     print(peak_pth)
     write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist),
-                peak_pth, row.names = FALSE, sep = "\t")
+        peak_pth,
+        row.names = FALSE, sep = "\t"
+    )
 
     removed_peaks <- data.frame(removed_peaks)
     write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks),
-        file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t")
-
-}else{
-
-   # nolint start
-   # TODO
-   #xsets <- split(xset, multilist_df$multlist)
-   #
-   #mult_grps <- unique(multilist_df$multlist)
-   #
-   #for (mgrp in mult_grps){
-   #   xset_i <- xsets[mgrp]
-   #   xcms::group(xset_i,
-   #
-   # }
-   # nolint end
-
-
+        file.path(opt$out_dir, "removed_peaks.tsv"),
+        row.names = FALSE, sep = "\t"
+    )
+} else {
+    # nolint start
+    # TODO
+    # xsets <- split(xset, multilist_df$multlist)
+    #
+    # mult_grps <- unique(multilist_df$multlist)
+    #
+    # for (mgrp in mult_grps){
+    #   xset_i <- xsets[mgrp]
+    #   xcms::group(xset_i,
+    #
+    # }
+    # nolint end
 }