Mercurial > repos > metaboflow_cam > isolab
view isolab/pkgs/all_IsotopicLabelling.R @ 3:63b35931859a draft default tip
Uploaded
author | metaboflow_cam |
---|---|
date | Tue, 22 Oct 2019 04:25:30 -0400 |
parents | |
children |
line wrap: on
line source
#' wl-24-02-2018, Sat: from https://github.com/RuggeroFerrazza/IsotopicLabelling #' wl-25-02-2018, Sun: commence for debug and test. Notes: Use other R #' packages: ecipex, gsubfn, xcms and stringr #' wl-08-05-2018, Tue: create an remote repository: #' https://github.com/wanchanglin/isolab #' wl-23-03-2019, Sat: apply 'styler' to re-format #' wl-27-03-2019, Wed: call 'ecipex' directly #' ======================================================================== #' Main function of the package #' #' It computes the estimated X abundances of each sample, returning an #' object of the class \code{labelling}. #' #' @param peak_table A data.frame containing the integrated signals for the #' samples #' @param compound The chemical formula of the compound of interest #' @param charge Natural number, denoting the charge state of the target #' adduct (1,2,3,...). If not provided, it is 1 by default #' @param labelling Character, either "H" or "C", specifying which is the #' labelling element #' @param mass_shift Maximum shift allowed in the mass range #' @param RT Expected retention time of the compund of interest #' @param RT_shift Maximum shift allowed in the retention time range #' @param chrom_width Chromatographic width of the peaks #' @param initial_abundance Initial estimate for the abundance of the #' heaviest X isotope (the label) #' #' @details #' \itemize{ #' \item{peak_table:}{ The first two columns of \code{peak_table} represent #' the mass and the retention time of the peaks; the other columns #' represent peak intensities for each sample. The table can be obtained #' using the function \code{table_xcms}} #' \item{compound:}{ Character vector, where X has to represent the element #' with isotopic distribution to be fitted} #' \item{initial_abundance:}{ Numeric vector of the same length as the #' number of samples, with the initial estimate for the abundance of the #' heaviest X isotope (either ^2H or ^13C). If provided, number between 0 #' and 100. Otherwise, NA} #' } #' #' @return An object of the class \code{labelling}, which is a list #' containing the results from the fitting procedure: #' \item{compound}{ Character vector specifying the chemical formula of the compound of interest, #' with X being the element with unknown isotopic distribution (to be fitted)} #' \item{Best_estimate}{ Numeric vector representing the best estimated abundance #' of the heaviest X isotope (either ^2H or ^13C). Number between 0 and 100} #' \item{std_error}{ Numeric vector containing the standard errors of the estimates} #' \item{dev_percent}{ The percentage deviations of the fitted theoretical patterns to the provided experimental patterns} #' \item{x_scale}{ Vector containing the \emph{m/z} signals of the isotopic patterns} #' \item{y_exp}{ Matrix containing normalised experimental patterns, #' where for each sample the most intense signal is set to 100} #' \item{y_theor}{ Matrix of normalised fitted theoretical pattern (most intense signal set to 100 for each sample)} #' \item{warnings}{ Character vector containing possible warnings coming from the fitting procedure} #' #' #' @examples #' #' data(xcms_obj) #' peak_table <- table_xcms(xcms_obj) #' fitted_abundances <- main_labelling(peak_table, #' compound="X40H77NO8P", #' charge=1, #' labelling="C", #' mass_shift=0.05, #' RT=285, #' RT_shift=20, #' chrom_width=7, #' initial_abundance=NA) #' summary(object=fitted_abundances) #' plot(x=fitted_abundances, type="patterns", saveplots=FALSE) #' plot(x=fitted_abundances, type="residuals", saveplots=FALSE) #' plot(x=fitted_abundances, type="summary", saveplots=FALSE) #' save_labelling(fitted_abundances) #' grouped_estimates <- group_labelling(fitted_abundances, #' groups=factor(c(rep("C12",4), #' rep("C13",4)))) #' # Other possible lipid compounds include: #' # [PC34:1 + H]+. compound="X42H83NO8P", RT=475, chrom_width=10 #' # [TAG47:3 + NH4]+(a minor species). compound="X50H94NO6", #' # RT=891, chrom_width=7 #' @author Ruggero Ferrazza #' @keywords manip #' @export main_labelling <- function(peak_table, compound, charge=1, labelling, mass_shift, RT, RT_shift, chrom_width, initial_abundance=NA){ #' Get some useful isotopic information, to be used in the coming #' functions info <- isotopic_information(compound, charge, labelling) #' Extract one pattern for each sample (each column of the peak_table data #' frame) experimental_patterns <- isotopic_pattern(peak_table, info, mass_shift, RT, RT_shift, chrom_width) #' For each extracted pattern, find the X isotopic distribution that #' better fits the experimental data fitted_abundances <- find_abundance(patterns=experimental_patterns, info, initial_abundance, charge) return(fitted_abundances) } #' ======================================================================== #' Get useful isotopic information #' #' This function gathers essential isotopic information required by the #' other functions of the \code{\link{IsotopicLabelling}} package. #' #' @param compound Character vector specifying the chemical formula of the #' compound of interest, with X being the element with unknown isotopic #' distribution (to be fitted) #' @param charge Natural number, denoting the charge state of the target #' adduct (1,2,3,...). If not provided, it is 1 by default #' @param labelling Character, either "H" or "C", specifying the labelling #' element #' #' @return A list with the following elements: #' \item{compound}{The same as input} #' \item{target}{Named vector with the exact masses of all the possible #' isotopologues arising from the labelling isotope. M+0 is the #' monoisotopic mass (sum of the masses of the atoms using the lightest #' isotope for each element, X included); in M+1 one light isotope is #' replaced by its heaviest counterpart, and so forth} #' \item{isotopes}{Table containing the natural isotopic abundances of the #' elements present in compound (numbers between 0 and 1). #' The two isotopes of element X are given NA value} #' \item{nX}{The number of X atoms. In other words, the number of atoms with #' unknown isotopic distribution} #' \item{nTOT}{The total number of atoms of the labelling element (either #' H+X or C+X)} #' #' @details The specified compound is not the neutral molecular species of #' interest, but the adduct observed by ESI-MS (such as protonated or #' sodiated species). In the chemical formula, the element with unknown #' abundance should be denoted by X. For example, the proton adduct of TAG #' 52:2, C55H103O6, should be written X55H103O6 for ^13C labelling #' experiments, and C55X102HO6 for ^2H labelling experiments. Note that in #' this last case only 102 hydrogen atoms have unknown isotopic #' distribution, since the one giving rise to the adduct comes from the #' solvent, and is considered to have fixed natural abundance. #' #' @export #' #' @examples #' info <- isotopic_information(compound="X40H77NO8P", charge=1, labelling="C") #' # This is the case for [PC 32:2+H]+ in a ^13C-labelling experiment #' @author Ruggero Ferrazza #' @keywords manip #' #' ------------------------------------------------------------------- #' wl-20-02-2018, Tue: `nistiso` from package `ecipex` #' A data frame giving the masses, standard isotopic abundances and nucleon #' numbers of most chemical elements isotopic_information <- function(compound, charge=1, labelling){ #' Check that labelling is correct if (labelling !="H" & labelling !="C") stop("Check the labelling character: it should be either H or C") #' Introduce X in the isotopes data isotopes <- nistiso[,1:3] # wl-20-02-2018, Tue: global variable from `ecipex` X_new <- isotopes[which(isotopes$element==labelling),] X_new$element <- "X" isotopes <- rbind(isotopes, X_new) #' Compute the mass difference between heavier and lighter isotope mass_diff <- abs(diff(X_new[,"mass"]))/charge #' In isotopes data frame keep only the elements of interest DF <- strapply(compound, #' wl-26-03-18: from package 'gsubfn' "([A-Z][a-z]*)(\\d*)", ~ c(..1, if (nchar(..2)) ..2 else 1), simplify = ~ as.data.frame(t(matrix(..1, 2)), stringsAsFactors = FALSE)) DF[[2]] <- as.numeric(DF[[2]]) isotopes <- isotopes[isotopes$element %in% DF[,1],] #' Compute the exact mass of the species of interest (suppose that X has natural abundance) DF[[3]] <- apply(DF, 1, function(x){ a <- which(x[1]==isotopes$element) a <- a[which.max(isotopes$abundance[a])] return(isotopes$mass[a]) }) exact_mass <- sum(DF[[2]]*DF[[3]])/charge #' Set the X abundance to be unknown isotopes$abundance[which(isotopes$element == "X")] <- NA row.names(isotopes) <- seq(nrow(isotopes)) #' Extract total number of atoms of the element being labelled nTOT <- DF[which(DF[,1]==labelling),2] if (length(nTOT)==0) nTOT <- 0 nX <- DF[which(DF[,1]=="X"),2] if (length(nX)==0) nX <- 0 nTOT <- nTOT + nX #' Create the target vector, containing the exact masses of all the #' possible isotopic variants arising from X #' Lowest mass: 2 mass units below the monoisotopic mass #' Highest mass: 2 mass units above the mass corresponding to all X atoms #' having been labelled with the heaviest isotope target <- round(seq(from=exact_mass-2*mass_diff, by=mass_diff, length=nX+5), digits=4) names(target) <- c("M-2", "M-1", paste("M+", 0:(nX+2), sep="")) return(list(compound=compound, isotopes=isotopes, target=target, nX=nX, nTOT=nTOT)) } #' ======================================================================== #' Extract experimental isotopic patterns from a table of MS peaks #' #' Function that extracts the experimental isotopic patterns of a specified #' compound from a data frame containing MS peak intensities or areas. #' #' @param peak_table Data frame of experimental MS peak intensities or areas #' (one column for each sample), with the first two columns representing #' \emph{m/z} and retention times of the peaks #' @param info Named list containing isotopic information, output of the #' \code{\link{isotopic_information}} function #' @param mass_shift Maximum difference between theoretical and experimental #' mass. In other words, the expected mass accuracy #' @param RT Expected retention time of the compound of interest #' @param RT_shift Maximum difference between expected and experimental #' retention time of the peaks #' @param chrom_width An estimate of the chromatographic peak width #' #' @return A matrix of extracted experimental isotopic patterns (one column #' for each sample), with the first two columns representing the exact #' \emph{m/z} and the retention times of the peaks #' #' @details The table can be obtained from an \code{xcmsSet} object (output #' of the \code{xcms} R package) through the \code{\link{table_xcms}} #' function. #' #' #' @export #' #' @examples #' \dontrun{ #' experimental_patterns <- isotopic_pattern(peak_table, info, mass_shift=0.05, #' RT=285, RT_shift=20, chrom_width=7) #' } #' #' @author Ruggero Ferrazza #' @seealso \code{\link{table_xcms}} , \code{\link{isotopic_information}} #' @keywords manip isotopic_pattern <- function(peak_table, info, mass_shift, RT, RT_shift, chrom_width){ tmp_list <- lapply(info$target, function(x){ ind <- which( (abs(peak_table$mz - x) < mass_shift) & (peak_table$rt < (RT + RT_shift) ) & (peak_table$rt > (RT - RT_shift) ) ) return(data.frame(ind=ind, rt=peak_table[ind,"rt"])) }) #' Extract the retention times of all the peaks rt_overall <- sort(unique(unlist(lapply(tmp_list, function(x){x$rt}), use.names=F))) rt_grouped <- apply(abs(outer(rt_overall,rt_overall,'-')), 2, function(u) list(rt_overall[u<=chrom_width])) rt_grouped <- unique(lapply(rt_grouped, "[[", 1)) rt_candidates <- sapply(rt_grouped, mean) rt_best <- rt_candidates[which.min(abs(rt_candidates - RT))] #' Define the matrix where to put the signals patterns <- matrix(0, nrow=length(info$target), ncol=(ncol(peak_table))) row.names(patterns) <- names(info$target) colnames(patterns) <- colnames(peak_table) for (i in 1:length(info$target)){ ind <- which( (abs(peak_table$mz - info$target[i]) < mass_shift) & (abs(peak_table$rt - rt_best)<= chrom_width) ) if (length(ind)>=1){ ind <- ind[which.min(abs(peak_table$rt[ind] - rt_best))] patterns[i,] <- as.numeric(peak_table[ind,]) } } #' Check that the most intense signals do not come from M-2 or M-1 (which #' could arise from the same species as the target, with one more #' unsaturation) If so, "delete" the whole experimental pattern if (ncol(patterns)>=3){ max_pos <- apply(patterns[,-c(1,2)], 2, which.max) patterns[,c(F,F,max_pos <=2)] <- 0 } patterns[is.na(patterns)] <- 0 #' Cut out the first two masses (M-2 and M-1) patterns <- patterns[-c(1,2),] #' Check that each pattern has at least two signals different from 0, otherwise set all to 0 ind_pat <- apply(patterns[,-c(1,2)], 2, function(c) sum(c!=0)) > 1 patterns[,c(F,F,!ind_pat)] <- 0 #' Add the exact masses to the patterns matrix patterns[,"mz"] <- info$target[-c(1,2)] patterns[ which(patterns[,"rt"]==0), "rt"] <- NA #' Return the obtained patterns return(patterns) } #' ======================================================================= #' Fit experimental isotopic patterns #' #' Function that takes each of the provided experimental MS isotopic #' patterns, and fits the best theoretical pattern that reproduces it #' through a weighted non-linear least squares procedure. #' #' #' @param patterns A matrix of experimental isotopic patterns (one column #' for each sample), with the first two columns representing \emph{m/z} #' and retention time of the corresponding peaks #' @param info Named list containing isotopic information, output of the #' \code{\link{isotopic_information}} function #' @param initial_abundance Either NA, or a numeric vector of length equal #' to the number of samples, with the initial guesses on the percentage #' isotopic abundance of the labelling isotope (denoted as X, it can be #' either ^2H or ^13C). If provided, numbers between 0 and 100 #' @param charge Natural number, denoting the charge state of the target #' adduct (1,2,3,...). If not provided, it is 1 by default #' #' @return An object of class \code{labelling}, #' which is a list containing the results of the fitting procedure: #' \item{compound}{Character vector specifying the chemical formula of the compound of interest, #' with X being the element with unknown isotopic distribution (to be fitted)} #' \item{best_estimate}{Numeric vector of length equal to the number of samples, #' containing the estimated percentage abundances of the labelling isotope X #' (either ^2H or ^13C). Numbers between 0 and 100} #' \item{std_error}{Numeric vector with the standard errors of the estimates, #' output of the \code{nls} fitting procedure} #' \item{dev_percent}{Numeric vector with the percentage deviations between best fitted and related experimental patterns} #' \item{x_scale}{Numeric vector containing the \emph{m/z} values relative to the signals of the experimental patterns} #' \item{y_exp}{Matrix of normalised experimental isotopic patterns (one column for each sample). #' The most intense signal of each pattern is set to 100} #' \item{y_theor}{Matrix of normalised fitted theoretical isotopic patterns (one column for each sample). #' The most intense signal of each pattern is set to 100} #' \item{residuals}{Matrix of residuals: each column is the difference between experimental and best fitted theoretical patterns} #' \item{warnings}{Character vector with possible warnings from the \code{nls} fitting procedure} #' #' @export #' #' @examples #' \dontrun{ #' fitted_abundances <- find_abundance(patterns, info, initial_abundance=NA, charge=1) #' } #' #' @author Ruggero Ferrazza #' @seealso \code{\link{isotopic_information}} find_abundance <- function(patterns, info, initial_abundance=NA, charge=1){ tmp_results <- list() #' ======================================================================== analysis_X <- function(pattern, info, initial_ab=NA, charge=1){ #' Create a vector of masses target <- info$target[-c(1,2)] #' Create the list to return in case of errors error_list <- list(compound = info$compound, best_estimate = NA, std_error = NA, dev_percent = NA, x_scale = target, y_exp = pattern/max(pattern)*100, y_theor = rep(NA, times = length(pattern)), residuals = rep(NA, times = length(pattern)), warnings = "An error occurred") if (sum(pattern)==0) return(error_list) #' Normalise the experimental pattern (max intensity = 100) pattern <- pattern/max(pattern)*100 #' Find and store the mass of the most intense signal mass_max <- target[which.max(pattern)] #' First, rough estimate of the X abundance (either 2H or 13C), using #' mass_max and the exact mass If the user inserts a first estimate #' (initial_abundance), skip this step if (is.na(initial_ab)) initial_ab <- unname(round(((mass_max - target[1])*charge)/info$nX, digits=3)) if (initial_ab <0) initial_ab <- 0 if (initial_ab >1) initial_ab <- 1 #' Fitting procedure to find the best estimate for the X isotopic #' abundance. The signals of the pattern are given weights proportional #' to the square root of their intensity, so as to give less importance #' to noise. Define a function of only one parameter, the abundance, #' which will have to be fitted by the nls function pattern_fit <- function(abundance) { pattern_from_abundance(abundance, info=info, charge=charge) } fit <- nls(formula = pattern ~ pattern_fit(abundance), start = list(abundance = initial_ab), control = list(maxiter = 50, tol = 5e-8, warnOnly = T), algorithm = "port", weights = sqrt(pattern), na.action = na.exclude, lower = 0, upper = 1) if (inherits(try(summary(fit), silent=TRUE),"try-error")) return(error_list) warnings <- fit$convInfo$stopMessage return(list(best_estimate = summary(fit)$coefficients[1]*100, std_error = summary(fit)$coefficients[2]*100, dev_percent = (sqrt(sum((summary(fit)$residuals)^2)/ sum(pattern^2))*100), x_scale = target, y_exp = pattern, y_theor = pattern_fit(summary(fit)$coefficients[1]), residuals = pattern - pattern_fit(summary(fit)$coefficients[1]), warnings = warnings)) } #' ======================================================================== for (i in 3:ncol(patterns)){ tmp_results[[i-2]] <- analysis_X(pattern=patterns[,i], info=info, initial_ab=initial_abundance[i-2]/100, charge=charge) } names(tmp_results) <- colnames(patterns[,-c(1,2)]) #' Create the output list from the results obtained best_estimate <- sapply(tmp_results, "[[", "best_estimate") std_error <- sapply(tmp_results, "[[", "std_error") dev_percent <- sapply(tmp_results, "[[", "dev_percent") x_scale <- patterns[,"mz"] y_exp <- sapply(tmp_results, "[[", "y_exp") y_theor <- sapply(tmp_results, "[[", "y_theor") residuals <- sapply(tmp_results, "[[", "residuals") warnings <- sapply(tmp_results, "[[", "warnings") results <- list(compound=info$compound, best_estimate=best_estimate, std_error=std_error, dev_percent=dev_percent, x_scale=x_scale, y_exp=y_exp, y_theor=y_theor, residuals=residuals, warnings=warnings) class(results) <- "labelling" return(results) } #' ======================================================================== #' Compute theoretical isotopic patterns #' #' Function that computes the theoretical pattern of the specified compound, #' given the abundance of the labelling isotope X. #' The function returns a vector of normalised theoretical intensities, #' where the maximum signal is set to 100; #' the related masses are in the "target" vector contained in the input list. #' #' @param abundance Isotopic abundance of the labelling isotope X (either #' ^2H or ^13C); number between 0 and 1 #' @param info Named list containing isotopic information, #' output of the \code{\link{isotopic_information}} function #' @param charge Natural number, denoting the charge state of the target #' adduct (1,2,3,...). If not provided, it is 1 by default #' #' @return A vector representing the normalised isotopic pattern of the #' compound of interest, corresponding to the specified isotopic #' distribution #' @export #' #' #' @author Ruggero Ferrazza #' @references The function makes use of the \code{\link[ecipex]{ecipex}} R package #' @seealso \code{\link{isotopic_information}}, \code{\link[ecipex]{ecipex}} #' @keywords manip #' #' pattern_from_abundance <-function(abundance, info, charge=1){ #' Get the table containing isotopic information isotopes <- info$isotopes #' Modify the isotopes table with the abundance specified by the user isotopes[which(isotopes$element=="X")[1],"abundance"] <- 1 - abundance isotopes[which(isotopes$element=="X")[2],"abundance"] <- abundance theoret_pattern <- as.matrix(ecipex(info$compound, isoinfo = isotopes, limit = 1e-12, id = FALSE, sortby = "mass")[[1]]) #' Correct masses for charge state theoret_pattern[,1] <- theoret_pattern[,1]/charge #' Group together signals coming from isotopologues with the same nucleon #' number, and assign them the proper position theoretical_pattern <- unlist(lapply(info$target[-c(1,2)], function(x){ ind <- which(abs(x - theoret_pattern[,1]) <0.2/charge) return(sum(theoret_pattern[ind,2])) })) #' Normalise the theoretical pattern theoretical_pattern <- theoretical_pattern/max(theoretical_pattern)*100 return(theoretical_pattern) } #' ======================================================================= #' Batch process a set of target analytes #' #' This function batch processes LC- or GC-MS data, analyzing the isotopic #' patterns of a set of target analytes specified by the user. As with other #' functions of the package, it also requires chromatogrpahic information. #' #' #' @param targets A data frame containing information on the target #' analytes. Columns 1 to 8 are: "name_compound" (the name of the target #' analytes), "compound", "charge", "labelling", "RT", "RT_shift", #' "chrom_width", "mass_shift" (the same parameters for #' \code{\link{main_labelling}} function). Columns from 9 onwards contain #' the initial estimates for the label abundances (one estimate for each #' sample). If not known, a single column of NA values can be entered #' @param groups A factor containing the name of the group of each sample #' analysed; The function will calculate summary statistics for the #' samples belonging to the same group #' @param plot_patterns,plot_residuals,plot_results Whether or not to plot #' the patterns, the residuals and a summary of the results. If so, pdf #' files are created in the working directory #' @param save_results Whether to save the results of the estimates. If so, #' *.csv files are generated #' #' @return batch_grouped_estimates A list as long as the number of target #' analytes, containing the summary of the fitted results (group #' estimates) #' #' @export #' #' @examples #' # Get the sample dataset #' data("xcms_obj") #' #' # Convert the MS data set #' peak_table <- table_xcms(xcms_obj) #' #' # Get the example data frame containing target abalytes #' data("targets") #' #' # Batch process the data #' batch_grouped_estimates <- batch_labelling(targets=targets, #' groups=factor(c(rep("C12",4), rep("C13",4))), #' plot_patterns=FALSE, plot_residuals=FALSE, plot_results=FALSE, save_results=FALSE) #' #' @author Ruggero Ferrazza #' #' @seealso \link{main_labelling}, \link{group_labelling}, #' \link{save_labelling}, \link{plot.labelling} #' -------------------------------------------------------------------- #' wl-20-02-2018, Tue: add 'peak_table' as argument. batch_labelling <- function(peak_table, targets, groups, plot_patterns=T, plot_residuals=F, plot_results=F, save_results=F){ #' attach targets data frame attach(targets) #' Batch process batch_grouped_estimates <- list() for (i in 1:length(compound)){ batch_fitted <- main_labelling(peak_table, compound=compound[i], charge=charge[i], labelling=labelling[i], mass_shift=mass_shift[i], RT=RT[i], RT_shift=RT_shift[i], chrom_width=chrom_width[i], initial_abundance=as.numeric(targets[i,8:ncol(targets)])) #' plot the patterns if (plot_patterns) plot(x=batch_fitted, type="patterns", saveplots=T) #' plot the residuals if (plot_residuals) plot(x=batch_fitted, type="residuals", saveplots=T) #' plot the overall results if (plot_results) plot(x=batch_fitted, type="summary", saveplots=T) #' save the results to a *.csv file if (save_results) save_labelling(batch_fitted) #' Group the samples and obtain grouped estimates batch_grouped_estimates[[i]] <- group_labelling(batch_fitted, groups=groups) } names(batch_grouped_estimates) <- name_compound detach(targets) return(batch_grouped_estimates) } #' ======================================================================= #' Compute a single estimated isotopic abundance for each sample group #' #' This function groups the fitted abundances in order to give a single #' estimated value for each sample group, with related standard error of the #' mean that takes into account both the errors relative to each estimate #' from the fitting procedure, and the variability across samples. #' #' @param fitted_abundances Object of class \code{labelling}. It contains #' the results of the isotopic pattern analysis @param groups A factor #' containing the name of the group of each sample analysed; The function #' will calculate summary statistics for the samples belonging to the same #' group #' #' @return A data frame containing the summary statistics calculated #' groupwise. For each row (a group), it details: \item{N}{The number of #' samples in that group} \item{Mean}{The averaged estimated percentage #' isotopic abundance of the labelling isotope} \item{SE mean}{The standard #' error of the mean} \item{t_crit}{The critical value for a 95\% confidence #' interval of the t distribution with N-1 degrees of freedom} \item{Lower #' 95\% CI}{The lower 95\% confidence interval value} \item{Upper 95\% #' CI}{The upper 95\% confidence interval value} #' #' @details For each group, the average is simply computed by considering #' that the obtained individual values are representative of the population. #' #' As for the standard deviations, they are obtained using the law of total #' variance: the overall variance in each group is the sum of two distinct #' contributions, the first one related to the uncertainties associated in #' each sample estimate, and the second one arising from the spread of the #' estimates (biological variability). #' #' @export #' #' @examples \dontrun{ grouped_estimates <- #' group_labelling(fitted_abundances, groups=factor(c(rep("CTRL",4), #' rep("TRTD",4)))) #'} #' #' @author Ruggero Ferrazza @keywords manip #' #' group_labelling <- function(fitted_abundances, groups){ #' Extract the estimated percentage abundances and the std errors of the #' fit estimates <- fitted_abundances$best_estimate std_err_fit <- fitted_abundances$std_error #' Remove NA's from the data ind_NA <- which(is.na(estimates) | is.na(std_err_fit)) if (length(ind_NA) !=0) { estimates <- estimates[-ind_NA] std_err_fit <- std_err_fit[-ind_NA] groups <- groups[-ind_NA] } #' Compute the average for each group avg <- tapply(estimates, groups, mean) #' Compute the variance due to biological variability var_biol <- tapply(estimates, groups, var) #' Compute the variance due to single-sample errors var_within <- tapply(std_err_fit, groups, function(x){1/length(x)*sum(x^2)}) #' Compute the total variance var_TOT <- var_biol + var_within #' Compute the std error of the MEAN N <- tapply(groups,groups,length) std_MEAN <- sqrt( var_TOT / N ) #' Compute the 95% Confidence intervals width <- tapply(groups, groups, function(x){ qt(.975, df=length(x)-1) }) Lower <- avg - width*std_MEAN Upper <- avg + width*std_MEAN #' Provide the output grouped_estimates <- data.frame(N, avg, std_MEAN, width, Lower, Upper); names(grouped_estimates) <- c("N", "Mean", "SE mean", "t_crit", "Lower 95% CI", "Upper 95% CI") return(grouped_estimates) } #' ======================================================================== #' Plot method for \code{labelling} objects #' #' Produces different types of summary plots for a \code{labelling} object. #' #' @param x Object of class \code{labelling} #' @param type The type of output produced. Available options are #' "patterns", "residuals", "summary" #' @param saveplots Should the plots be saved as a PDF file? #' @param ... Other parameters #' #' @return One or more plots #' @details The default (type 'patterns') plot shows, for each sample in the #' class \code{labelling} object, the normalized experimental pattern #' superimposed to its fitted theoretical pattern. By setting type to #' 'residuals', the function plots the residuals (the differences between #' experimental and best fitted theoretical patterns). Type 'summary' #' produces a summary plot showing the estimated percentage abundances #' with related standard errors. If \code{saveplot} is TRUE, the plots are #' saved to a PDF file in the working directory. #' @export #' #' @examples #' \dontrun{ #' plot(x=fitted_abundances, type="patterns", saveplots=TRUE) #' plot(x=fitted_abundances, type="residuals", saveplots=TRUE) #' plot(x=fitted_abundances, type="summary", saveplots=TRUE) #' } #' #' @author Ruggero Ferrazza #' @keywords hplot plot.labelling <- function(x, type="patterns", saveplots=F, ...){ fitted_abundances <- x plot.new() old.par <- par(no.readonly = T) sample_name <- names(fitted_abundances$best_estimate) if (type=="patterns"){ #' Plot the results if (saveplots==T) { pdf(paste(fitted_abundances$compound, "_Isotopic_Patterns", ".pdf", sep=""), width=6.5, height=3) par(mar=c(3,3,2.5,0.1), mgp=c(2,0.6,0)) } else par(mar=c(4,4.5,4,0.1), mfrow=c(3,1), ask=T) for (k in 1:length(sample_name)){ plot(fitted_abundances$x_scale, fitted_abundances$y_exp[,k], type="h", main=paste(sample_name[k], " , Compound: ", fitted_abundances$compound), xlab="Target mass", ylab="Normalised intensity", ylim=c(0,110), cex.main=1, ...) text=paste("Fitted X Abundance: (", sprintf("%1.3f", fitted_abundances$best_estimate[k]), "+/-", sprintf("%1.3f", fitted_abundances$std_error[k]), ")\ %") mtext(text, cex=0.8) points(fitted_abundances$x_scale, fitted_abundances$y_theor[,k], col=2, pch=16, cex=.5) #' col="red" points(fitted_abundances$x_scale[1], -2.5, pch=17, col="blue") legend("top", legend=c("Experimental pattern", "Theoretical pattern"), lty=c(1,0), pch=c(0,16), pt.cex=c(0,0.6), col=c(1,2), cex=0.7, horiz=TRUE) } } else if (type=="residuals") { #' Plot the residuals if (saveplots==T) { pdf(paste(fitted_abundances$compound, "_Residuals", ".pdf", sep=""), width=6.5, height=3) par(mar=c(3,3,2.5,0.1), mgp=c(2,0.6,0)) } else par(mar=c(4,4.5,4,0.1), mfrow=c(3,1), ask=T) for (k in 1:length(sample_name)){ plot(fitted_abundances$x_scale, fitted_abundances$residuals[,k], type="h", main=paste("Residuals for ", sample_name[k]), xlab="Target mass", ylab="Normalised intensity", cex.main=1, ...) text=paste("Fitted X Abundance: (", sprintf("%1.3f", fitted_abundances$best_estimate[k]), "+/-", sprintf("%1.3f", fitted_abundances$std_error[k]), ")\ %") mtext(text, cex=0.8) abline(h=0, col="gray") } } else if (type=="summary"){ if (saveplots==T) pdf(paste(fitted_abundances$compound, "_Summary", ".pdf", sep=""), width=10, height=7) par(mar=c(6.4,4.1,4.1,2.1)) plot(fitted_abundances$best_estimate, type="p", pch=16, cex=0.6, xaxt="n", main="Summary of the Estimated Abundances", ylab="Fitted X abundance [%]", xlab="", cex.main=1.2, ...) mtext(paste("Compound: ", fitted_abundances$compound)) axis(side=1, at=1:length(sample_name), labels=sample_name, las=2, cex.axis=0.7) segments(x0 = 1:length(sample_name), y0 = fitted_abundances$best_estimate - fitted_abundances$std_error, x1 = 1:length(sample_name), y1 = fitted_abundances$best_estimate + fitted_abundances$std_error, col = "gray") } if (saveplots==T) dev.off() else par(old.par) } #' ======================================================================== #' Export to csv #' #' Function that saves the obtained results to a csv file. #' #' @param fitted_abundances Object of class \code{labelling} #' @param path The directory where to save the csv file. #' If not specified, the results are saved in the working directory #' #' @return The "COMPOUND_Estimated_Abundances.csv" file, containing the #' results of the analysis. #' For each sample (one for each row) there are four columns: #' \enumerate{ #' \item The estimated percentage abundance of the labelling isotope (either #' ^2H or ^13C); #' \item The related standard error coming from the \code{nls} fitting #' procedure; #' \item The percentage deviation between theoretical and experimental #' isotopic patterns; #' \item The outcome message from the fitting procedure, to undersand #' whether there have been any convergence problems. #' } #' @export #' @author Ruggero Ferrazza #' #' #' @keywords IO #' @seealso \link{main_labelling} #' save_labelling <-function(fitted_abundances, path=getwd()){ table <- cbind(fitted_abundances$best_estimate, fitted_abundances$std_error, fitted_abundances$dev_percent, fitted_abundances$warnings) colnames(table) <- c("Best estimate [%]", "Standard Error [%]", "Percentage deviation [%]", "Fitting outcome messages/Warnings") filename <- paste(fitted_abundances$compound, "_Estimated_Abundances.csv", sep="") file <- paste(path, filename, sep="/") write.csv(x = table, file=file) } #' ======================================================================== #' Summary of the labelling fitting #' #' Function that produces a summary of the results from an object of class #' \code{labelling}. #' #' @param object Object of class \code{labelling}, output of either #' \code{\link{main_labelling}} or \code{\link{find_abundance}} functions #' @param ... Additional parameters #' #' @return \item{results}{Matrix containing a summary of the fitted results. #' It has two rows, the first containing the estimated percentage isotopic #' abundances of the labelling isotope X (^2H or ^13C), and the second one #' containing the standard errors from the fitting procedure} #' @export #' #' @author Ruggero Ferrazza #' #' #' @keywords manip #' summary.labelling <- function(object, ...){ fitted_abundances <- object best_est <- fitted_abundances$best_estimate std_error <- fitted_abundances$std_error results <- rbind(best_est, std_error) row.names(results) <- c("Best Estimate [%]", "Standard Error [%]") return(results) } #' ====================================================================== #' Process \code{xcmsSet} #' #' Function that properly converts an \code{xcmsSet} object, #' from package \code{xcms}, into a table of peaks. #' #' @param xcms_obj An xcmsSet object #' #' @return \item{peak_table}{Data frame extracted from the \code{xcmsSet} #' object. The first two columns represent mass and retention time of the #' related peaks} #' #' @note The output data frame, required by other functions of the #' \code{\link{IsotopicLabelling}} R package, can be obtained in a number of #' other independent ways, such as through proprietary software of the #' vendor of the MS instrument. #' #' @author Ruggero Ferrazza #' #' @examples #' data(xcms_obj) #' peak_table <- table_xcms(xcms_obj) #' #' @keywords manip #' @export #' ----------------------------------------------------------------------- #' wl-03-04-2018, Tue: should provide dot arguments for 'peakTable'. #' Actually this dot argument is for 'groupval' #' ----------------------------------------------------------------------- table_xcms <- function(xcms_obj){ #' Check that the file in input is an xcmsSet object if (class(xcms_obj) != "xcmsSet") stop("ERROR: The provided object is not an xcmsSet object") peak_table <- peakTable(xcms_obj) n_files <- length(sampnames(xcms_obj)) peak_table <- peak_table[,c(which(colnames(peak_table)=="mz" | colnames(peak_table)=="rt"), (ncol(peak_table)-n_files+1):ncol(peak_table))] return(peak_table) } #' ====================================================================== #' @title Example data frame for batch processing #' #' @description A data frame containing information on target analytes to #' batch process. The first 8 columns contain the same parameters to input #' when using \code{\link{main_labelling}} function. See the details in #' \code{\link{batch_labelling}} #' @author Ruggero Ferrazza #' @examples #' data(targets) #' "targets" #' ====================================================================== #' @title Example data set #' #' @description An "xcmsSet" object containing the results of the LC-MS #' analysis of 8 lipid extract obtained in a ^13C isotopic labelling #' experiment. #' The first 4 samples were obtained from cell cultures grown under normal #' conditions (natural ^13C abundance), whereas in the remaining 4 the cells #' were grown using uniformly-labelled 99\% ^13C glucose. @format an #' \code{xcmsSet} object from the package \code{xcms} #' @source Department of Biochemistry, University of Cambridge - UK #' @author Dr. Julian L Griffin and Dr. Nyasha Munjoma #' @examples #' data("xcms_obj") #' "xcms_obj" #' ======================================================================== #' IsotopicLabelling-package #' #' The \code{IsotopicLabelling} package allows to analyse the isotopic #' patterns in MS data obtained in isotopic labelling experiments. From the #' experimental patterns, the package estimates the isotopic abundance of #' the stable isotope employed in the labelling experiment (either ^2H or #' ^13C) inside a specified compound. #' #' #' @section Details: Given a data frame of LC-MS or GC-MS peak intensities #' or areas (one column for each sample to analyse), the #' \code{\link{IsotopicLabelling}} package first extracts the isotopic #' patterns of the specified compound, and then performs an isotopic #' pattern analysis to estimate the isotopic abundance of the labelling #' isotope. This is performed through a weighted non-linear least squares #' fitting procedure, where the resulting estimate is the value for which #' the theoretical pattern best reproduces the experimental one. During #' the fitting, the experimental signals are given weights proportional to #' the square root of their intensity, to correct for the non uniform #' variance at different intensity levels. The theoretical patterns are #' computed using the \code{\link[ecipex]{ecipex}} R package. #' #' @section Block diagram: #' The isotopic pattern analysis can be divided into the following steps: #' \enumerate{ #' \item Starting from a class \code{xcmsSet} object (from the \code{xcms} R #' package), generate a data frame of peak signal intensities or areas, with #' each column corresponding to a sample. This step can be avoided if the #' data frame is already available (obtained by other means); #' \item Extract from the data frame the experimental isotopic patterns of #' the specified compound (one pattern for each sample). In the chemical #' formula of the compound, the element whose abundance is unknown is called #' "X"; #' \item Normalise the patterns and estimate the abundance of the label #' through a weighted non-linear least squares fitting procedure. #' \item Summarize the results. #' } #' #' @docType package #' @name IsotopicLabelling #' #' @import xcms #' @import ecipex #' @import stringr #' @import gsubfn #' #' @author Ruggero Ferrazza, Pietro Franceschi NULL #' ======================================================================== #' WL: Slightly modification of function 'plot.labelling' for Galaxy only. #' #' wl-22-05-2018, Tue: For details, see 'plot.labelling' #' ----------------------------------------------------------------------- plot.func <- function(x, type="patterns", ...){ sample_name <- names(x$best_estimate) if (type=="patterns"){ old.par <- par(mar=c(4,4.5,4,0.1), mfrow=c(3,1)) for (k in 1:length(sample_name)){ plot(x$x_scale, x$y_exp[,k], type="h", main=paste(sample_name[k], " , Compound: ", x$compound), xlab="Target mass", ylab="Normalised intensity", ylim=c(0,110), cex.main=1, ...) text=paste("Fitted X Abundance: (", sprintf("%1.3f", x$best_estimate[k]), "+/-", sprintf("%1.3f", x$std_error[k]), ")\ %") mtext(text, cex=0.8) points(x$x_scale, x$y_theor[,k], col=2, pch=16, cex=.5) #' col="red" points(x$x_scale[1], -2.5, pch=17, col="blue") legend("top", legend=c("Experimental pattern", "Theoretical pattern"), lty=c(1,0), pch=c(0,16), pt.cex=c(0,0.6), col=c(1,2), cex=0.7, horiz=TRUE) } } else if (type=="residuals") { old.par <- par(mar=c(4,4.5,4,0.1), mfrow=c(3,1)) for (k in 1:length(sample_name)){ plot(x$x_scale, x$residuals[,k], type="h", main=paste("Residuals for ", sample_name[k]), xlab="Target mass", ylab="Normalised intensity", cex.main=1, ...) text=paste("Fitted X Abundance: (", sprintf("%1.3f", x$best_estimate[k]), "+/-", sprintf("%1.3f", x$std_error[k]), ")\ %") mtext(text, cex=0.8) abline(h=0, col="gray") } } else if (type=="summary"){ old.par <- par(mar=c(6.4,4.1,4.1,2.1)) plot(x$best_estimate, type="p", pch=16, cex=0.6, xaxt="n", main="Summary of the Estimated Abundances", ylab="Fitted X abundance [%]", xlab="", cex.main=1.2, ...) mtext(paste("Compound: ", x$compound)) axis(side=1, at=1:length(sample_name), labels=sample_name, las=2, cex.axis=0.7) segments(x0 = 1:length(sample_name), y0 = x$best_estimate - x$std_error, x1 = 1:length(sample_name), y1 = x$best_estimate + x$std_error, col = "gray") } par(old.par) }