ms2snoop: MS2snoop.R comparison

comparison MS2snoop.R @ 0:91a3242fd67f draft

"planemo upload commit c7676a9c7ac542043691d735285ae19e430bf032"

author	workflow4metabolomics
date	Mon, 25 Apr 2022 08:23:54 +0000
parents
children	df2672c37732

comparison

equal deleted inserted replaced

--1:000000000000
+:91a3242fd67f
+#'
+#' read and process mspurity W4M files
+#' create a summary of fragment for each precursor and a graphics of peseudo
+#' spectra + correlation on which checking of fragment is based on
+#' V3 try to identify and process multiple files for 1 precursor which may
+#' occur if different collision energy are used
+#' V4 elimination of correlation = NA. Correlation is done with precursor, if
+#' precursor is not present correlation with most intense peak
+#' author: Jean-Francois Martin
+#' V5 is versionned, lintR-compliant, packaged, unit-tested, documented and
+#' tested against data from other labs.
+#' new maintainer: Lain Pavot - lain.pavot@inrae.fr
+#'
+#' @import optparse
+#'
+NULL
+assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv")
+assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv")
+assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt")
+assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt")
+assign("DEFAULT_TOLMZ", 0.01)
+assign("DEFAULT_TOLRT", 20)
+assign("DEFAULT_MZDECIMAL", 0)
+assign("DEFAULT_R_THRESHOLD", 0.85)
+assign("DEFAULT_MINNUMBERSCAN", 8)
+assign("DEFAULT_SEUIL_RA", 0.5)
+lockBinding("DEFAULT_PRECURSOR_PATH", globalenv())
+lockBinding("DEFAULT_FRAGMENTS_PATH", globalenv())
+lockBinding("DEFAULT_COMPOUNDS_PATH", globalenv())
+lockBinding("DEFAULT_OUTPUT_PATH", globalenv())
+lockBinding("DEFAULT_TOLMZ", globalenv())
+lockBinding("DEFAULT_TOLRT", globalenv())
+lockBinding("DEFAULT_MZDECIMAL", globalenv())
+lockBinding("DEFAULT_R_THRESHOLD", globalenv())
+lockBinding("DEFAULT_MINNUMBERSCAN", globalenv())
+lockBinding("DEFAULT_SEUIL_RA", globalenv())
+assign("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", 0.85)
+assign("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", 0.1)
+assign("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", 0.01)
+assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60)
+lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv())
+lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv())
+lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv())
+lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv())
+debug <- FALSE
+########################################################################
+#' @title plot_pseudo_spectra
+#' @param x
+#' @param r_threshold
+#' @param fid
+#' @param sum_int
+#' @param vmz
+#' @param cor_abs_int
+#' @param refcol
+#' @param c_name
+#' @description plot_pseudo_spectra
+#' function to compute sum of intensities among scans for all
+#' m/z kept (cor > r_threshold & minimum number of scans)
+#' and plot pseudo spectra
+#' x dataframe scan X fragments with scans number in the 1st column and
+#' ions in next with intensities
+#' fid file id when several a precursor has been detected in several files
+plot_pseudo_spectra <- function(
+x,
+r_threshold,
+fid,
+sum_int,
+vmz,
+cor_abs_int,
+refcol,
+c_name
+) {
+## du fait de la difference de nombre de colonne entre la dataframe qui
+## inclue les scans en 1ere col, mzRef se decale de 1
+refcol <- refcol - 1
+## compute relative intensities max=100%
+rel_int <- sum_int[-1]
+rel_int <- rel_int / max(rel_int)
+## define max value on vertical axis (need to increase in order to plot the
+## label of fragments)
+ymax <- max(rel_int) + 0.2 * max(rel_int)
+par(mfrow = c(2, 1))
+plot(vmz, rel_int, type = "h", ylim = c(0, ymax), main = c_name)
+## low correl coef. will be display in grey
+cor_low <- which(round(cor_abs_int, 2) < r_threshold)
+lbmzcor <- sprintf("%s(r=%s)", vmz, round(cor_abs_int, 2))
+if (length(cor_low) > 0) {
+text(
+vmz[cor_low],
+rel_int[cor_low],
+lbmzcor[cor_low],
+cex = 0.5,
+col = "grey",
+srt = 90,
+adj = 0
+)
+if (length(vmz) - length(cor_low) > 1) {
+text(
+vmz[-c(refcol, cor_low)],
+rel_int[-c(refcol, cor_low)],
+lbmzcor[-c(refcol, cor_low)],
+cex = 0.6,
+col = 1,
+srt = 90,
+adj = 0
+)
+}
+} else {
+if (length(vmz) > 1) {
+text(
+vmz[-c(refcol)],
+rel_int[-c(refcol)],
+lbmzcor[-c(refcol)],
+cex = 0.6,
+col = 1,
+srt = 90,
+adj = 0
+)
+}
+}
+text(
+vmz[refcol],
+rel_int[refcol],
+lbmzcor[refcol],
+cex = 0.8,
+col = 2,
+srt = 90,
+adj = 0
+)
+## prepare result file
+corValid <- (round(cor_abs_int, 2) >= r_threshold) ##nolint object_name_linter
+cp_res <- data.frame(
+rep(c_name, length(vmz)),
+rep(fid, length(vmz)),
+vmz,
+cor_abs_int,
+sum_int[-1],
+rel_int,
+corValid
+)
+colnames(cp_res) <- c(
+"compoundName",
+"fileid",
+"fragments_mz",
+"CorWithPrecursor",
+"AbsoluteIntensity",
+"relativeIntensity",
+"corValid"
+)
+return(cp_res)
+}
+#'
+#' @title extract_fragments
+#'
+#' @param precursors the precursor list from mspurity
+#' @param fragments the fragments list from ms purity
+#' @param mzref
+#' @param rtref
+#' @param c_name
+#' @param r_threshold default = DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD
+#' @param seuil_ra default = DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA
+#' @param tolmz default = DEFAULT_EXTRACT_FRAGMENTS_TOLMZ
+#' @param tolrt default = DEFAULT_EXTRACT_FRAGMENTS_TOLRT
+#' @returns
+#'
+#' @description
+#' function for extraction of fragments corresponding to precursors
+#' detected by MSPurity
+extract_fragments <- function( ## nolint cyclocomp_linter
+precursors,
+fragments,
+mzref,
+rtref,
+c_name,
+min_number_scan,
+mzdecimal,
+r_threshold=DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD,
+seuil_ra=DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA,
+tolmz=DEFAULT_EXTRACT_FRAGMENTS_TOLMZ,
+tolrt=DEFAULT_EXTRACT_FRAGMENTS_TOLRT
+) {
+## filter precursor in the precursors file based on mz and rt in the
+## compound list
+cat("processing ", c_name, "\n")
+selected_precursors <- which(
+(abs(precursors$precurMtchMZ - mzref) <= tolmz)
+& (abs(precursors$precurMtchRT - rtref) <= tolrt)
+)
+## check if there is the precursor in the file
+if (length(selected_precursors) > 0) {
+sprecini <- precursors[selected_precursors, ]
+## check if fragments corresponding to precursor are found in several
+## files (collision energy)
+## this lead to a processing for each fileid
+mf <- levels(as.factor(sprecini$fileid))
+if (length(mf) > 1) {
+cat(" several files detected for this compounds :\n")
+}
+for (f in seq_along(mf)) {
+sprec <- sprecini[sprecini$fileid == mf[f], ]
+## selection of fragment in the fragments file with the grpid common in
+## both fragments and precursors
+selfrgt <- levels(as.factor(sprec$grpid))
+sfrgt <- fragments[
+fragments$grpid %in% selfrgt
+& fragments$fileid == mf[f],
+]
+## filter fragments on relative intensity seuil_ra = user defined
+## parameter (MSpurity flags could be used here)
+sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra, ]
+mznominal <- round(x = sfrgtfil$mz, mzdecimal)
+sfrgtfil <- data.frame(sfrgtfil, mznominal)
+## creation of cross table row=scan col=mz X=ra
+vmz <- levels(as.factor(sfrgtfil$mznominal))
+cat(" fragments :", vmz)
+## mz of precursor in data precursor to check correlation with
+mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal))
+for (m in seq_along(vmz)) {
+## absolute intensity
+cln <- c(
+which(colnames(sfrgtfil) == "acquisitionNum"),
+which(colnames(sfrgtfil) == "i")
+)
+int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln]
+colnames(int_mz)[2] <- paste0("mz", vmz[m])
+## average intensities of mass in duplicate scans
+comp_scans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
+int_mz <- comp_scans[, -1]
+if (m == 1) {
+ds_abs_int <- int_mz
+} else {
+ds_abs_int <- merge(
+x = ds_abs_int,
+y = int_mz,
+by.x = 1,
+by.y = 1,
+all.x = TRUE,
+all.y = TRUE
+)
+}
+}
+if (debug) {
+write.table(
+x = ds_abs_int,
+file = paste0(c_name, "ds_abs_int.txt"),
+row.names = FALSE,
+sep = "\t"
+)
+}
+## elimination of mz with less than min_number_scan scans (user defined
+## parameter)
+xmz <- rep(NA, ncol(ds_abs_int) - 1)
+sum_int <- rep(NA, ncol(ds_abs_int))
+nbxmz <- 0
+nb_scan_check <- min(nrow(ds_abs_int), min_number_scan)
+for (j in 2:ncol(ds_abs_int)) {
+sum_int[j] <- sum(ds_abs_int[j], na.rm = TRUE)
+if (sum(!is.na(ds_abs_int[[j]])) < nb_scan_check) {
+nbxmz <- nbxmz + 1
+xmz[nbxmz] <- j
+}
+}
+xmz <- xmz[-which(is.na(xmz))]
+if (length(xmz) > 0) {
+ds_abs_int <- ds_abs_int[, -c(xmz)]
+sum_int <- sum_int[-c(xmz)]
+## liste des mz keeped decale de 1 avec ds_abs_int
+vmz <- as.numeric(vmz[-c(xmz - 1)])
+}
+## reference ion for correlation computing = precursor OR maximum
+## intensity ion in precursor is not present
+refcol <- which(colnames(ds_abs_int) == mz_prec)
+if (length(refcol) == 0) {
+refcol <- which(sum_int == max(sum_int, na.rm = TRUE))
+}
+pdf(
+file = sprintf("%s_processing_file%s.pdf", c_name, mf[f]),
+width = 8,
+height = 11
+)
+par(mfrow = c(3, 2))
+## Pearson correlations between absolute intensities computing
+cor_abs_int <- rep(NA, length(vmz))
+if (length(refcol) > 0) {
+for (i in 2:length(ds_abs_int)) {
+cor_abs_int[i - 1] <- cor(
+x = ds_abs_int[[refcol]],
+y = ds_abs_int[[i]],
+use = "pairwise.complete.obs",
+method = "pearson"
+)
+plot(
+ds_abs_int[[refcol]],
+ds_abs_int[[i]],
+xlab = colnames(ds_abs_int)[refcol],
+ylab = colnames(ds_abs_int)[i],
+main = sprintf(
+"%s corr coeff r=%s", c_name, round(cor_abs_int[i - 1], 2)
+)
+)
+}
+## plot pseudo spectra
+res_comp_by_file <- plot_pseudo_spectra(
+x = ds_abs_int,
+r_threshold = r_threshold,
+fid = mf[f],
+sum_int = sum_int,
+vmz = vmz,
+cor_abs_int = cor_abs_int,
+refcol = refcol,
+c_name = c_name
+)
+if (f == 1) {
+res_comp <- res_comp_by_file
+}
+} else {
+res_comp_by_file <- NULL
+cat(" non detected in fragments file \n")
+}
+if (!is.null(res_comp_by_file)) {
+res_comp <- rbind(res_comp, res_comp_by_file)
+}
+cat("\n")
+dev.off()
+}
+} else {
+res_comp <- NULL
+cat(" non detected in precursor file \n")
+}
+return(res_comp)
+}
+create_parser <- function() {
+parser <- optparse::OptionParser()
+parser <- optparse::add_option(
+parser,
+c("-v", "--verbose"),
+action = "store_true",
+default = FALSE,
+help = "Print extra output [default %default]"
+)
+parser <- optparse::add_option(
+parser,
+c("-o", "--output"),
+type = "character",
+default = DEFAULT_OUTPUT_PATH,
+action = "store",
+help = "Path to the output file [default %default]"
+)
+parser <- optparse::add_option(
+parser,
+c("-p", "--precursors"),
+type = "character",
+default = DEFAULT_PRECURSOR_PATH,
+action = "store",
+help = "Path to the precursors file [default %default]"
+)
+parser <- optparse::add_option(
+parser,
+c("-f", "--fragments"),
+type = "character",
+default = DEFAULT_FRAGMENTS_PATH,
+action = "store",
+help = "Path to the fragments file [default %default]"
+)
+parser <- optparse::add_option(
+parser,
+c("-c", "--compounds"),
+type = "character",
+default = DEFAULT_COMPOUNDS_PATH,
+action = "store",
+help = "Path to the compounds file [default %default]"
+)
+parser <- optparse::add_option(
+parser,
+c("--tolmz"),
+type = "numeric",
+action = "store",
+default = DEFAULT_TOLMZ,
+metavar = "number"
+)
+parser <- optparse::add_option(
+parser,
+c("--tolrt"),
+type = "integer",
+action = "store",
+default = DEFAULT_TOLRT,
+metavar = "number"
+)
+parser <- optparse::add_option(
+parser,
+c("--seuil_ra"),
+type = "numeric",
+action = "store",
+help = "relative intensity threshold",
+default = DEFAULT_SEUIL_RA,
+metavar = "number"
+)
+parser <- optparse::add_option(
+parser,
+c("--mzdecimal"),
+type = "integer",
+default = DEFAULT_MZDECIMAL,
+action = "store",
+help = "nb decimal for mz",
+metavar = "number"
+)
+parser <- optparse::add_option(
+parser,
+c("--r_threshold"),
+type = "integer",
+default = DEFAULT_R_THRESHOLD,
+action = "store",
+help = paste0(
+"r pearson correlation threshold between precursor and fragment ",
+"absolute intensity"
+),
+metavar = "number"
+)
+parser <- optparse::add_option(
+parser,
+c("--min_number_scan"),
+type = "numeric",
+action = "store",
+default = DEFAULT_MINNUMBERSCAN,
+help = paste0(
+"fragments are kept if there are found in a minimum number ",
+"of scans"
+),
+metavar = "number"
+)
+return(parser)
+}
+main <- function(args) {
+## FOLDER AND FILES
+## MSpurity precursors file
+precursors <- read.table(
+file = args$precursors,
+header = TRUE,
+sep = "\t",
+quote = "\""
+)
+## MSpurity fragments file
+fragments <- read.table(
+file = args$fragments,
+header = TRUE,
+sep = "\t",
+quote = "\""
+)
+## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time
+compounds <- read.table(
+file = args$compounds,
+sep = "\t",
+quote = "\"",
+header = TRUE
+)
+## PARAMETERS
+## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds
+## list with the precursor MSpurity file
+tolmz <- args$tolmz
+tolrt <- args$tolrt
+##  relative intensity threshold
+seuil_ra <- args$seuil_ra
+## nb decimal for mz
+mzdecimal <- args$mzdecimal
+## r pearson correlation threshold between precursor and
+# #fragment absolute intensity
+r_threshold <- args$r_threshold
+## fragments are kept if there are found in a minimum number of scans
+min_number_scan <- args$min_number_scan
+for (i in seq_len(nrow(compounds))) {
+## loop execution for all compounds in the compounds file
+res_cor <- NULL
+res_cor <- extract_fragments(
+precursors = precursors,
+fragments = fragments,
+mzref = compounds[[2]][i],
+rtref = compounds[[3]][i],
+c_name = compounds[[1]][i],
+min_number_scan = min_number_scan,
+mzdecimal = mzdecimal,
+r_threshold = r_threshold,
+seuil_ra = seuil_ra,
+tolmz = tolmz,
+tolrt = tolrt
+)
+if (i == 1 & !is.null(res_cor)) {
+res_all <- res_cor
+} else if (!is.null(res_cor)) {
+res_all <- rbind(res_all, res_cor)
+}
+}
+if (is.null(res_all)) {
+stop("No result at all!")
+}
+write.table(
+x = res_all,
+file = args$output,
+sep = "\t",
+row.names = FALSE
+)
+}
+args <- optparse::parse_args(create_parser())
+sessionInfo()
+main(args)
+warnings()

Mercurial > repos > workflow4metabolomics > ms2snoop

comparison MS2snoop.R @ 0:91a3242fd67f draft