Mercurial > repos > computational-metabolomics > mspurity_createdatabase
diff flagRemove.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | 600a866a510c |
children | efd14b326007 |
line wrap: on
line diff
--- a/flagRemove.R Fri Nov 13 10:05:38 2020 +0000 +++ b/flagRemove.R Thu Mar 04 12:27:21 2021 +0000 @@ -2,105 +2,100 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("-o", "--out_dir"), type="character", default=getwd(), - help="Output folder for resulting files [default = %default]" + make_option(c("-o", "--out_dir"), type = "character", default = getwd(), + help = "Output folder for resulting files [default = %default]" ), - make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"), - help="The path to the xcmsSet object [default = %default]" + make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), + help = "The path to the xcmsSet object [default = %default]" ), - make_option("--polarity", default=NA, - help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" + make_option("--polarity", default = NA, + help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" ), - make_option("--rsd_i_blank", default=100, - help="RSD threshold for the blank [default = %default]" + make_option("--rsd_i_blank", default = 100, + help = "RSD threshold for the blank [default = %default]" ), - make_option("--minfrac_blank", default=0.5, - help="minimum fraction of files for features needed for the blank [default = %default]" + make_option("--minfrac_blank", default = 0.5, + help = "minimum fraction of files for features needed for the blank [default = %default]" ), - make_option("--rsd_rt_blank", default=100, - help="RSD threshold for the RT of the blank [default = %default]" + make_option("--rsd_rt_blank", default = 100, + help = "RSD threshold for the RT of the blank [default = %default]" ), - make_option("--ithres_blank", default=0, - help="Intensity threshold for the blank [default = %default]" + make_option("--ithres_blank", default = 0, + help = "Intensity threshold for the blank [default = %default]" ), - make_option("--s2b", default=10, - help="fold change (sample/blank) needed for sample peak to be allowed. e.g. + make_option("--s2b", default = 10, + help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. 1000/10 = 100, so sample has fold change higher than the threshold and the peak is not considered a blank [default = %default]" ), - make_option("--blank_class", default='blank', type="character", - help="A string representing the class that will be used for the blank.[default = %default]" + make_option("--blank_class", default = "blank", type = "character", + help = "A string representing the class that will be used for the blank.[default = %default]" ), - make_option("--egauss_thr", default=NA, - help="Threshold for filtering out non gaussian shaped peaks. Note this only works + make_option("--egauss_thr", default = NA, + help = "Threshold for filtering out non gaussian shaped peaks. Note this only works if the 'verbose columns' and 'fit gauss' was used with xcms [default = %default]" ), - make_option("--rsd_i_sample", default=100, - help="RSD threshold for the samples [default = %default]" + make_option("--rsd_i_sample", default = 100, + help = "RSD threshold for the samples [default = %default]" ), - make_option("--minfrac_sample", default=0.8, - help="minimum fraction of files for features needed for the samples [default = %default]" + make_option("--minfrac_sample", default = 0.8, + help = "minimum fraction of files for features needed for the samples [default = %default]" ), - make_option("--rsd_rt_sample", default=100, - help="RSD threshold for the RT of the samples [default = %default]" + make_option("--rsd_rt_sample", default = 100, + help = "RSD threshold for the RT of the samples [default = %default]" ), - make_option("--ithres_sample", default=5000, - help="Intensity threshold for the sample [default = %default]" + make_option("--ithres_sample", default = 5000, + help = "Intensity threshold for the sample [default = %default]" ), - make_option("--grp_rm_ids", default=NA, - help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) + make_option("--grp_rm_ids", default = NA, + help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) [default = %default]" ), - make_option("--remove_spectra", action="store_true", - help=" TRUE if flagged spectra is to be removed [default = %default]" + make_option("--remove_spectra", action = "store_true", + help = "TRUE if flagged spectra is to be removed [default = %default]" ), - make_option("--minfrac_xcms", default=0.5, - help="minfrac for xcms grouping [default = %default]" + make_option("--minfrac_xcms", default = 0.5, + help = "minfrac for xcms grouping [default = %default]" ), - make_option("--mzwid", default=0.001, - help="mzwid for xcms grouping [default = %default]" + make_option("--mzwid", default = 0.001, + help = "mzwid for xcms grouping [default = %default]" ), - make_option("--bw", default=5, - help="bw for xcms grouping [default = %default]" + make_option("--bw", default = 5, + help = "bw for xcms grouping [default = %default]" ), - make_option("--temp_save", action="store_true", - help="Assign True if files for each step saved (for testing purposes) [default = %default]" + make_option("--temp_save", action = "store_true", + help = "Assign True if files for each step saved (for testing purposes) [default = %default]" ), - make_option("--samplelist", type="character", help="Sample list to determine the blank class") - - - - + make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") ) - #make_option("--multilist", action="store_true" - # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" - #), +# nolint start +# make_option("--multilist", action="store_true" +# help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" +# ), +# nolint end # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) opt <- replace(opt, opt == "NA", NA) - - - -if (is.null(opt$temp_save)){ - temp_save<-FALSE +if (is.null(opt$temp_save)) { + temp_save <- FALSE }else{ - temp_save<-TRUE + temp_save <- TRUE } -if (is.null(opt$remove_spectra)){ - remove_spectra<-FALSE +if (is.null(opt$remove_spectra)) { + remove_spectra <- FALSE }else{ - remove_spectra<-TRUE + remove_spectra <- TRUE } @@ -109,35 +104,35 @@ getxcmsSetObject <- function(xobject) { # XCMS 1.x if (class(xobject) == "xcmsSet") - return (xobject) + return(xobject) # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, 'xcmsSet')) - sampclass(xset) <- xset@phenoData$sample_group - return (xset) + suppressWarnings(xset <- as(xobject, "xcmsSet")) + xcms::sampclass(xset) <- xset@phenoData$sample_group + return(xset) } } -loadRData <- function(rdata_path, name){ +loadRData <- function(rdata_path, name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } -xset <- getxcmsSetObject(loadRData(opt$xset_path, c('xset','xdata'))) +xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) print(xset) -if (is.null(opt$samplelist)){ +if (is.null(opt$samplelist)) { blank_class <- opt$blank_class }else{ - samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE) - samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes']) + samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) + samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] - if (length(chosen_blank)>1){ - print('ERROR: only 1 blank is currently allowed to be used with this tool') + if (length(chosen_blank) > 1) { + print("ERROR: only 1 blank is currently allowed to be used with this tool") quit() } blank_class <- as.character(chosen_blank) @@ -145,47 +140,47 @@ } -if (is.null(opt$multilist)){ +if (is.null(opt$multilist)) { ffrm_out <- flag_remove(xset, - pol=opt$polarity, - rsd_i_blank=opt$rsd_i_blank, - minfrac_blank=opt$minfrac_blank, - rsd_rt_blank=opt$rsd_rt_blank, - ithres_blank=opt$ithres_blank, - s2b=opt$s2b, - ref.class=blank_class, - egauss_thr=opt$egauss_thr, - rsd_i_sample=opt$rsd_i_sample, - minfrac_sample=opt$minfrac_sample, - rsd_rt_sample=opt$rsd_rt_sample, - ithres_sample=opt$ithres_sample, - minfrac_xcms=opt$minfrac_xcms, - mzwid=opt$mzwid, - bw=opt$bw, - out_dir=opt$out_dir, - temp_save=temp_save, - remove_spectra=remove_spectra, - grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]]) - print('flag remove finished') + pol = opt$polarity, + rsd_i_blank = opt$rsd_i_blank, + minfrac_blank = opt$minfrac_blank, + rsd_rt_blank = opt$rsd_rt_blank, + ithres_blank = opt$ithres_blank, + s2b = opt$s2b, + ref.class = blank_class, + egauss_thr = opt$egauss_thr, + rsd_i_sample = opt$rsd_i_sample, + minfrac_sample = opt$minfrac_sample, + rsd_rt_sample = opt$rsd_rt_sample, + ithres_sample = opt$ithres_sample, + minfrac_xcms = opt$minfrac_xcms, + mzwid = opt$mzwid, + bw = opt$bw, + out_dir = opt$out_dir, + temp_save = temp_save, + remove_spectra = remove_spectra, + grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) + print("flag remove finished") xset <- ffrm_out[[1]] grp_peaklist <- ffrm_out[[2]] removed_peaks <- ffrm_out[[3]] - save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'), version=2) + save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2) # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) - peak_pth <- file.path(opt$out_dir, 'peaklist_filtered.tsv') + peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") print(peak_pth) - write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist), - peak_pth, row.names=FALSE, sep='\t') + write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), + peak_pth, row.names = FALSE, sep = "\t") removed_peaks <- data.frame(removed_peaks) - write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), - file.path(opt$out_dir, 'removed_peaks.tsv'), row.names=FALSE, sep='\t') + write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), + file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") }else{ - + # nolint start # TODO #xsets <- split(xset, multilist_df$multlist) # @@ -193,12 +188,10 @@ # #for (mgrp in mult_grps){ # xset_i <- xsets[mgrp] - # xcms::group(xset_i, + # xcms::group(xset_i, # # } - + # nolint end } - -