Mercurial > repos > computational-metabolomics > mspurity_createmsp
comparison flagRemove.R @ 8:b91b9492a4bf draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
| author | computational-metabolomics | 
|---|---|
| date | Wed, 12 Jun 2024 16:04:05 +0000 | 
| parents | d25273689e04 | 
| children | 3d92b95cf6c0 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 7:8512923bdd37 | 8:b91b9492a4bf | 
|---|---|
| 1 library(msPurity) | 1 library(msPurity) | 
| 2 library(optparse) | 2 library(optparse) | 
| 3 print(sessionInfo()) | 3 print(sessionInfo()) | 
| 4 option_list <- list( | 4 option_list <- list( | 
| 5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(), | 5 make_option(c("-o", "--out_dir"), | 
| 6 help = "Output folder for resulting files [default = %default]" | 6 type = "character", default = getwd(), | 
| 7 ), | 7 help = "Output folder for resulting files [default = %default]" | 
| 8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), | 8 ), | 
| 9 help = "The path to the xcmsSet object [default = %default]" | 9 make_option(c("-x", "--xset_path"), | 
| 10 ), | 10 type = "character", default = file.path(getwd(), "xset.rds"), | 
| 11 make_option("--polarity", default = NA, | 11 help = "The path to the xcmsSet object [default = %default]" | 
| 12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" | 12 ), | 
| 13 ), | 13 make_option("--polarity", | 
| 14 make_option("--rsd_i_blank", default = 100, | 14 default = NA, | 
| 15 help = "RSD threshold for the blank [default = %default]" | 15 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" | 
| 16 ), | 16 ), | 
| 17 make_option("--minfrac_blank", default = 0.5, | 17 make_option("--rsd_i_blank", | 
| 18 help = "minimum fraction of files for features needed for the blank [default = %default]" | 18 default = 100, | 
| 19 ), | 19 help = "RSD threshold for the blank [default = %default]" | 
| 20 make_option("--rsd_rt_blank", default = 100, | 20 ), | 
| 21 help = "RSD threshold for the RT of the blank [default = %default]" | 21 make_option("--minfrac_blank", | 
| 22 ), | 22 default = 0.5, | 
| 23 | 23 help = "minimum fraction of files for features needed for the blank [default = %default]" | 
| 24 make_option("--ithres_blank", default = 0, | 24 ), | 
| 25 help = "Intensity threshold for the blank [default = %default]" | 25 make_option("--rsd_rt_blank", | 
| 26 ), | 26 default = 100, | 
| 27 make_option("--s2b", default = 10, | 27 help = "RSD threshold for the RT of the blank [default = %default]" | 
| 28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. | 28 ), | 
| 29 make_option("--ithres_blank", | |
| 30 default = 0, | |
| 31 help = "Intensity threshold for the blank [default = %default]" | |
| 32 ), | |
| 33 make_option("--s2b", | |
| 34 default = 10, | |
| 35 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. | |
| 29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | 36 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | 
| 30 1000/10 = 100, so sample has fold change higher than the threshold and the peak | 37 1000/10 = 100, so sample has fold change higher than the threshold and the peak | 
| 31 is not considered a blank [default = %default]" | 38 is not considered a blank [default = %default]" | 
| 32 ), | 39 ), | 
| 33 make_option("--blank_class", default = "blank", type = "character", | 40 make_option("--blank_class", | 
| 34 help = "A string representing the class that will be used for the blank.[default = %default]" | 41 default = "blank", type = "character", | 
| 35 ), | 42 help = "A string representing the class that will be used for the blank.[default = %default]" | 
| 36 make_option("--egauss_thr", default = NA, | 43 ), | 
| 37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works | 44 make_option("--egauss_thr", | 
| 45 default = NA, | |
| 46 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works | |
| 38 if the 'verbose columns' and 'fit gauss' was used with xcms | 47 if the 'verbose columns' and 'fit gauss' was used with xcms | 
| 39 [default = %default]" | 48 [default = %default]" | 
| 40 ), | 49 ), | 
| 41 make_option("--rsd_i_sample", default = 100, | 50 make_option("--rsd_i_sample", | 
| 42 help = "RSD threshold for the samples [default = %default]" | 51 default = 100, | 
| 43 ), | 52 help = "RSD threshold for the samples [default = %default]" | 
| 44 make_option("--minfrac_sample", default = 0.8, | 53 ), | 
| 45 help = "minimum fraction of files for features needed for the samples [default = %default]" | 54 make_option("--minfrac_sample", | 
| 46 ), | 55 default = 0.8, | 
| 47 make_option("--rsd_rt_sample", default = 100, | 56 help = "minimum fraction of files for features needed for the samples [default = %default]" | 
| 48 help = "RSD threshold for the RT of the samples [default = %default]" | 57 ), | 
| 49 ), | 58 make_option("--rsd_rt_sample", | 
| 50 make_option("--ithres_sample", default = 5000, | 59 default = 100, | 
| 51 help = "Intensity threshold for the sample [default = %default]" | 60 help = "RSD threshold for the RT of the samples [default = %default]" | 
| 52 ), | 61 ), | 
| 53 make_option("--grp_rm_ids", default = NA, | 62 make_option("--ithres_sample", | 
| 54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | 63 default = 5000, | 
| 64 help = "Intensity threshold for the sample [default = %default]" | |
| 65 ), | |
| 66 make_option("--grp_rm_ids", | |
| 67 default = NA, | |
| 68 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | |
| 55 [default = %default]" | 69 [default = %default]" | 
| 56 ), | 70 ), | 
| 57 make_option("--remove_spectra", action = "store_true", | 71 make_option("--remove_spectra", | 
| 58 help = "TRUE if flagged spectra is to be removed [default = %default]" | 72 action = "store_true", | 
| 59 ), | 73 help = "TRUE if flagged spectra is to be removed [default = %default]" | 
| 60 make_option("--minfrac_xcms", default = 0.5, | 74 ), | 
| 61 help = "minfrac for xcms grouping [default = %default]" | 75 make_option("--minfrac_xcms", | 
| 62 ), | 76 default = 0.5, | 
| 63 make_option("--mzwid", default = 0.001, | 77 help = "minfrac for xcms grouping [default = %default]" | 
| 64 help = "mzwid for xcms grouping [default = %default]" | 78 ), | 
| 65 ), | 79 make_option("--mzwid", | 
| 66 make_option("--bw", default = 5, | 80 default = 0.001, | 
| 67 help = "bw for xcms grouping [default = %default]" | 81 help = "mzwid for xcms grouping [default = %default]" | 
| 68 ), | 82 ), | 
| 69 | 83 make_option("--bw", | 
| 70 make_option("--temp_save", action = "store_true", | 84 default = 5, | 
| 71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" | 85 help = "bw for xcms grouping [default = %default]" | 
| 72 ), | 86 ), | 
| 73 | 87 make_option("--temp_save", | 
| 74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") | 88 action = "store_true", | 
| 75 | 89 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" | 
| 90 ), | |
| 91 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") | |
| 76 ) | 92 ) | 
| 77 | 93 | 
| 78 # nolint start | 94 # nolint start | 
| 79 # make_option("--multilist", action="store_true" | 95 # make_option("--multilist", action="store_true" | 
| 80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" | 96 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" | 
| 86 | 102 | 
| 87 opt <- replace(opt, opt == "NA", NA) | 103 opt <- replace(opt, opt == "NA", NA) | 
| 88 | 104 | 
| 89 if (is.null(opt$temp_save)) { | 105 if (is.null(opt$temp_save)) { | 
| 90 temp_save <- FALSE | 106 temp_save <- FALSE | 
| 91 }else{ | 107 } else { | 
| 92 temp_save <- TRUE | 108 temp_save <- TRUE | 
| 93 } | 109 } | 
| 94 | 110 | 
| 95 if (is.null(opt$remove_spectra)) { | 111 if (is.null(opt$remove_spectra)) { | 
| 96 remove_spectra <- FALSE | 112 remove_spectra <- FALSE | 
| 97 }else{ | 113 } else { | 
| 98 remove_spectra <- TRUE | 114 remove_spectra <- TRUE | 
| 99 } | 115 } | 
| 100 | 116 | 
| 101 | 117 | 
| 102 print(opt) | 118 print(opt) | 
| 103 | 119 | 
| 104 getxcmsSetObject <- function(xobject) { | 120 getxcmsSetObject <- function(xobject) { | 
| 105 # XCMS 1.x | 121 # XCMS 1.x | 
| 106 if (class(xobject) == "xcmsSet") | 122 if (class(xobject) == "xcmsSet") { | 
| 107 return(xobject) | 123 return(xobject) | 
| 124 } | |
| 108 # XCMS 3.x | 125 # XCMS 3.x | 
| 109 if (class(xobject) == "XCMSnExp") { | 126 if (class(xobject) == "XCMSnExp") { | 
| 110 # Get the legacy xcmsSet object | 127 # Get the legacy xcmsSet object | 
| 111 suppressWarnings(xset <- as(xobject, "xcmsSet")) | 128 suppressWarnings(xset <- as(xobject, "xcmsSet")) | 
| 112 xcms::sampclass(xset) <- xset@phenoData$sample_group | 129 xcms::sampclass(xset) <- xset@phenoData$sample_group | 
| 114 } | 131 } | 
| 115 } | 132 } | 
| 116 | 133 | 
| 117 | 134 | 
| 118 loadRData <- function(rdata_path, name) { | 135 loadRData <- function(rdata_path, name) { | 
| 119 #loads an RData file, and returns the named xset object if it is there | 136 # loads an RData file, and returns the named xset object if it is there | 
| 120 load(rdata_path) | 137 load(rdata_path) | 
| 121 return(get(ls()[ls() %in% name])) | 138 return(get(ls()[ls() %in% name])) | 
| 122 } | 139 } | 
| 123 | 140 | 
| 124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) | 141 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) | 
| 125 | 142 | 
| 126 print(xset) | 143 print(xset) | 
| 127 if (is.null(opt$samplelist)) { | 144 if (is.null(opt$samplelist)) { | 
| 128 blank_class <- opt$blank_class | 145 blank_class <- opt$blank_class | 
| 129 }else{ | 146 } else { | 
| 130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) | 147 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) | 
| 131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) | 148 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) | 
| 132 | 149 | 
| 133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] | 150 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] | 
| 134 if (length(chosen_blank) > 1) { | 151 if (length(chosen_blank) > 1) { | 
| 140 } | 157 } | 
| 141 | 158 | 
| 142 | 159 | 
| 143 if (is.null(opt$multilist)) { | 160 if (is.null(opt$multilist)) { | 
| 144 ffrm_out <- flag_remove(xset, | 161 ffrm_out <- flag_remove(xset, | 
| 145 pol = opt$polarity, | 162 pol = opt$polarity, | 
| 146 rsd_i_blank = opt$rsd_i_blank, | 163 rsd_i_blank = opt$rsd_i_blank, | 
| 147 minfrac_blank = opt$minfrac_blank, | 164 minfrac_blank = opt$minfrac_blank, | 
| 148 rsd_rt_blank = opt$rsd_rt_blank, | 165 rsd_rt_blank = opt$rsd_rt_blank, | 
| 149 ithres_blank = opt$ithres_blank, | 166 ithres_blank = opt$ithres_blank, | 
| 150 s2b = opt$s2b, | 167 s2b = opt$s2b, | 
| 151 ref.class = blank_class, | 168 ref.class = blank_class, | 
| 152 egauss_thr = opt$egauss_thr, | 169 egauss_thr = opt$egauss_thr, | 
| 153 rsd_i_sample = opt$rsd_i_sample, | 170 rsd_i_sample = opt$rsd_i_sample, | 
| 154 minfrac_sample = opt$minfrac_sample, | 171 minfrac_sample = opt$minfrac_sample, | 
| 155 rsd_rt_sample = opt$rsd_rt_sample, | 172 rsd_rt_sample = opt$rsd_rt_sample, | 
| 156 ithres_sample = opt$ithres_sample, | 173 ithres_sample = opt$ithres_sample, | 
| 157 minfrac_xcms = opt$minfrac_xcms, | 174 minfrac_xcms = opt$minfrac_xcms, | 
| 158 mzwid = opt$mzwid, | 175 mzwid = opt$mzwid, | 
| 159 bw = opt$bw, | 176 bw = opt$bw, | 
| 160 out_dir = opt$out_dir, | 177 out_dir = opt$out_dir, | 
| 161 temp_save = temp_save, | 178 temp_save = temp_save, | 
| 162 remove_spectra = remove_spectra, | 179 remove_spectra = remove_spectra, | 
| 163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) | 180 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]] | 
| 181 ) | |
| 164 print("flag remove finished") | 182 print("flag remove finished") | 
| 165 xset <- ffrm_out[[1]] | 183 xset <- ffrm_out[[1]] | 
| 166 grp_peaklist <- ffrm_out[[2]] | 184 grp_peaklist <- ffrm_out[[2]] | 
| 167 removed_peaks <- ffrm_out[[3]] | 185 removed_peaks <- ffrm_out[[3]] | 
| 168 | 186 | 
| 170 | 188 | 
| 171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) | 189 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) | 
| 172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") | 190 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") | 
| 173 print(peak_pth) | 191 print(peak_pth) | 
| 174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), | 192 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), | 
| 175 peak_pth, row.names = FALSE, sep = "\t") | 193 peak_pth, | 
| 194 row.names = FALSE, sep = "\t" | |
| 195 ) | |
| 176 | 196 | 
| 177 removed_peaks <- data.frame(removed_peaks) | 197 removed_peaks <- data.frame(removed_peaks) | 
| 178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), | 198 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), | 
| 179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") | 199 file.path(opt$out_dir, "removed_peaks.tsv"), | 
| 180 | 200 row.names = FALSE, sep = "\t" | 
| 181 }else{ | 201 ) | 
| 182 | 202 } else { | 
| 183 # nolint start | 203 # nolint start | 
| 184 # TODO | 204 # TODO | 
| 185 #xsets <- split(xset, multilist_df$multlist) | 205 # xsets <- split(xset, multilist_df$multlist) | 
| 186 # | 206 # | 
| 187 #mult_grps <- unique(multilist_df$multlist) | 207 # mult_grps <- unique(multilist_df$multlist) | 
| 188 # | 208 # | 
| 189 #for (mgrp in mult_grps){ | 209 # for (mgrp in mult_grps){ | 
| 190 # xset_i <- xsets[mgrp] | 210 # xset_i <- xsets[mgrp] | 
| 191 # xcms::group(xset_i, | 211 # xcms::group(xset_i, | 
| 192 # | 212 # | 
| 193 # } | 213 # } | 
| 194 # nolint end | 214 # nolint end | 
| 195 | 215 } | 
| 196 | |
| 197 } | 
