Mercurial > repos > computational-metabolomics > mspurity_createdatabase
view dimsPredictPuritySingle.R @ 10:f5043704dd62 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit f10386dee95f3b1fbc8d1eeec52d450381ba89c5
author | computational-metabolomics |
---|---|
date | Fri, 13 Sep 2024 12:27:15 +0000 |
parents | efd14b326007 |
children |
line wrap: on
line source
library(msPurity) library(optparse) print(sessionInfo()) option_list <- list( make_option(c("--mzML_file"), type = "character"), make_option(c("--mzML_files"), type = "character"), make_option(c("--mzML_filename"), type = "character", default = ""), make_option(c("--mzML_galaxy_names"), type = "character", default = ""), make_option(c("--peaks_file"), type = "character"), make_option(c("-o", "--out_dir"), type = "character"), make_option("--minoffset", default = 0.5), make_option("--maxoffset", default = 0.5), make_option("--ilim", default = 0.05), make_option("--ppm", default = 4), make_option("--dimspy", action = "store_true"), make_option("--sim", action = "store_true"), make_option("--remove_nas", action = "store_true"), make_option("--iwNorm", default = "none", type = "character"), make_option("--file_num_dimspy", default = 1), make_option("--exclude_isotopes", action = "store_true"), make_option("--isotope_matrix", type = "character") ) # store options opt <- parse_args(OptionParser(option_list = option_list)) print(sessionInfo()) print(opt) print(opt$mzML_files) print(opt$mzML_galaxy_names) str_to_vec <- function(x) { print(x) x <- trimws(strsplit(x, ",")[[1]]) return(x[x != ""]) } find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { mzML_filename <- trimws(mzML_filename) mzML_files <- str_to_vec(mzML_files) galaxy_names <- str_to_vec(galaxy_names) if (mzML_filename %in% galaxy_names) { return(mzML_files[galaxy_names == mzML_filename]) } else { stop(paste("mzML file not found - ", mzML_filename)) } } if (is.null(opt$dimspy)) { df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file } else if (!is.null(opt$mzML_files)) { mzML_file <- find_mzml_file( opt$mzML_files, opt$mzML_galaxy_names, opt$mzML_filename ) } else { mzML_file <- file.path(opt$mzML_file, filename) } } else { indf <- read.table(opt$peaks_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE ) filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] print(filename) # check if the data file is mzML or RAW (can only use mzML currently) so # we expect an mzML file of the same name in the same folder indf$i <- indf[, colnames(indf) == filename] indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename]) filename <- sub("raw", "mzML", filename, ignore.case = TRUE) print(filename) if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file } else if (!is.null(opt$mzML_files)) { mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) } else { mzML_file <- file.path(opt$mzML_file, filename) } # Update the dimspy output with the correct information df <- indf[4:nrow(indf), ] if ("blank_flag" %in% colnames(df)) { df <- df[df$blank_flag == 1, ] } colnames(df)[colnames(df) == "m.z"] <- "mz" if ("nan" %in% df$mz) { df[df$mz == "nan", ]$mz <- NA } df$mz <- as.numeric(df$mz) } if (!is.null(opt$remove_nas)) { df <- df[!is.na(df$mz), ] } if (is.null(opt$isotope_matrix)) { im <- NULL } else { im <- read.table(opt$isotope_matrix, header = TRUE, sep = "\t", stringsAsFactors = FALSE ) } if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE } else { isotopes <- TRUE } if (is.null(opt$sim)) { sim <- FALSE } else { sim <- TRUE } minOffset <- as.numeric(opt$minoffset) maxOffset <- as.numeric(opt$maxoffset) if (opt$iwNorm == "none") { iwNorm <- FALSE iwNormFun <- NULL } else if (opt$iwNorm == "gauss") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) } else if (opt$iwNorm == "rcosine") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) } else if (opt$iwNorm == "QE5") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormQE.5() } print("FIRST ROWS OF PEAK FILE") print(head(df)) print(mzML_file) predicted <- msPurity::dimsPredictPuritySingle(df$mz, filepth = mzML_file, minOffset = minOffset, maxOffset = maxOffset, ppm = opt$ppm, mzML = TRUE, sim = sim, ilim = opt$ilim, isotopes = isotopes, im = im, iwNorm = iwNorm, iwNormFun = iwNormFun ) predicted <- cbind(df, predicted) print(head(predicted)) print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) write.table(predicted, file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), row.names = FALSE, sep = "\t" )