Mercurial > repos > metaboflow_cam > masspix
view masspix/all_massPix.R @ 2:b2f4fc0fc97d draft default tip
Uploaded
author | metaboflow_cam |
---|---|
date | Tue, 22 Oct 2019 05:44:45 -0400 |
parents | |
children |
line wrap: on
line source
#' wl-22-08-2017, Tue: first touch #' wl-23-08-2017, Wed: add some comments #' wl-24-08-2017, Thu: debug three functions: imageSlice, imagePca and #' cluster. #' wl-07-11-2017, Tue: Make some Changes #' - add 'lib_dir' for wrapper function 'massPix' #' - remove output in 'cluster' function #' - remove 'getwd' and use 'spectra_dir' in function 'subsetImage' #' wl-27-11-2017, Mon: More trivia changes #' wl-29-01-2018, Mon: modify and debug 'norm.median' #' wl-30-01-2018, Tue: modify and debug 'norm.standard'. #' wl-13-02-2018, Tue: modify and debug 'cluster' #' wl-25-03-2019, Mon: apply styler to reformat R codes #' library(calibrate) # for plot function 'textxy' only. #' library(rJava) # for execute Java #' library(Biobase) # for 'norm.median' #' ======================================================================== #' make library of lipid masses #' #' @param ionisation_mode Choose "positive" or "negative", will determine #' which lipid classes are in database #' @param sel.class A vector defining classes of lipids to be included in #' the library #' @param fixed Defines if one of the SN positions is fixed, default is F #' @param fixed_FA Defines the name of the fixed FA eg 16, 18.1, 20.4. #' @param lookup_lipid_class A data.frame defining lipid classes frm library #' @param lookup_FA A dataframe defining FAs from library #' @param lookup_element A data.frame defining elements from library #' @return Dataframe of masses for all combinations of FAs with chosen head #' groups #' @export #' #' wl-07-11-2017, Tue: should remove argument 'sel.class' #' makelibrary <- function(ionisation_mode, sel.class, fixed = F, fixed_FA, lookup_lipid_class, lookup_FA, lookup_element) { cat("\nMaking library of lipid masses...\n") if (ionisation_mode == "positive") { sel.class <- c( T, # TG T, # DG T, # PC F, # PA T, # PE T, # PS F, # PG F, # PI F, # PIP F, # PIP2 F, # PIP3 T, # LysoPC T, # DG-H20 T, # CE F, # FFA T, # SM T # Cer ) } if (ionisation_mode == "negative") { sel.class <- c( F, # TG F, # DG T, # PC T, # PA T, # PE T, # PS T, # PG T, # PI T, # PIP T, # PIP2 T, # PIP3 F, # LysoPC F, # DG-H20 F, # CE T, # FFA F, # SM T # Cer - ZLH changed this to true 20.03.18 ) } lookup_lipid_class <- cbind(lookup_lipid_class, sel.class) #' FAs to use in library FA_expt <- list( "10", "12", "14", "15", "16", "16.1", "17", "17.1", "18", "18.1", "18.2", "18.3", "20.3", "20.4", "20.5", "21", "22", "22.5", "22.6", "23", "24.1" ) library <- numeric() for (i in 1:nrow(lookup_lipid_class)) { if (lookup_lipid_class[i, "sel.class"] == T) { #' key variables rounder <- 3 # number of decimals the rounded masses are rounded to. #' lipidclass = "TG" lipidclass <- row.names(lookup_lipid_class[i, ]) #' determine how many FAs places to be used for combination and #' generate combination of FAs FA_number <- as.numeric(lookup_lipid_class[lipidclass, "FA_number"]) if (fixed == TRUE) FAnum <- FA_number - 1 else FAnum <- FA_number s1 <- combn(FA_expt, FAnum) #' if one place is fixed add this FA to the matrix if (fixed == TRUE) { s1 <- rbind(s1, "fixed" = fixed_FA) FAnum <- FAnum + 1 } #' if sn2 or sn3 does not have FA bind 'empty' FA channel. if (FAnum == 1) { s1 <- rbind(s1, sn2 <- vector(mode = "numeric", length = ncol(s1)), sn3 <- vector(mode = "numeric", length = ncol(s1))) FAnum <- FAnum + 2 } if (FAnum == 2) { s1 <- rbind(s1, sn3 <- vector(mode = "numeric", length = ncol(s1))) FAnum <- FAnum + 1 } #' label the matrix if (FAnum == 3) row.names(s1) <- c("FA1", "FA2", "FA3") #' add rows to matrix for massofFAs and formula massofFAs <- vector(mode = "numeric", length = ncol(s1)) s1 <- rbind(s1, massofFAs) formula <- vector(mode = "numeric", length = ncol(s1)) s1 <- rbind(s1, formula) #' row.names(s1) <-c("FA1", "FA2","FA3", "massofFAs") for (i in 1:ncol(s1)) { #' for 3 FAs if (FAnum == 3) { FA_1 <- as.character((s1[1, i])) FA_2 <- as.character((s1[2, i])) FA_3 <- as.character((s1[3, i])) s1["massofFAs", i] <- as.numeric((lookup_FA[FA_1, "FAmass"])) + as.numeric((lookup_FA[FA_2, "FAmass"])) + as.numeric((lookup_FA[FA_3, "FAmass"])) #' determine the formula temp_carbon <- as.numeric((lookup_FA[FA_1, "FAcarbon"])) + as.numeric((lookup_FA[FA_2, "FAcarbon"])) + as.numeric((lookup_FA[FA_3, "FAcarbon"])) temp_doublebond <- as.numeric((lookup_FA[FA_1, "FAdoublebond"])) + as.numeric((lookup_FA[FA_2, "FAdoublebond"])) + as.numeric((lookup_FA[FA_3, "FAdoublebond"])) s1["formula", i] <- paste(lipidclass, "(", temp_carbon, ":", temp_doublebond, ")", sep = "") } } #' calculate total mass totalmass <- vector(mode = "numeric", length = ncol(s1)) s1 <- rbind(s1, totalmass) for (i in 1:ncol(s1)) { s1["totalmass", i] <- as.numeric(s1["massofFAs", i]) + as.numeric(as.character(lookup_lipid_class[lipidclass, "headgroup_mass"])) - (as.numeric(lookup_lipid_class[lipidclass, "FA_number"]) * as.numeric(lookup_element["H", "mass"])) } #' make rows for charged lipids masses protonated <- vector(mode = "numeric", length = ncol(s1)) ammoniated <- vector(mode = "numeric", length = ncol(s1)) sodiated <- vector(mode = "numeric", length = ncol(s1)) potassiated <- vector(mode = "numeric", length = ncol(s1)) deprotonated <- vector(mode = "numeric", length = ncol(s1)) chlorinated <- vector(mode = "numeric", length = ncol(s1)) acetate <- vector(mode = "numeric", length = ncol(s1)) s1 <- rbind(s1, protonated, ammoniated, sodiated, potassiated, deprotonated, chlorinated, acetate) #' calculate charged lipids masses for (i in 1:ncol(s1)) { s1["protonated", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["H", "mass"])), digits = 4) s1["ammoniated", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["NH4", "mass"])), digits = 4) s1["sodiated", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["Na", "mass"])), digits = 4) s1["potassiated", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["K", "mass"])), digits = 4) s1["deprotonated", i] <- round((as.numeric(s1["totalmass", i]) - as.numeric(lookup_element["H", "mass"])), digits = 4) s1["chlorinated", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["Cl", "mass"])), digits = 4) s1["acetate", i] <- round((as.numeric(s1["totalmass", i]) + as.numeric(lookup_element["CH3COO", "mass"])), digits = 4) } #' make rows for rounded charged lipids masses round.protonated <- vector(mode = "numeric", length = ncol(s1)) round.ammoniated <- vector(mode = "numeric", length = ncol(s1)) round.sodiated <- vector(mode = "numeric", length = ncol(s1)) round.potassiated <- vector(mode = "numeric", length = ncol(s1)) round.deprotonated <- vector(mode = "numeric", length = ncol(s1)) round.chlorinated <- vector(mode = "numeric", length = ncol(s1)) round.acetate <- vector(mode = "numeric", length = ncol(s1)) s1 <- rbind(s1, round.protonated, round.ammoniated, round.sodiated, round.potassiated, round.deprotonated, round.chlorinated, round.acetate) #' calculate rounded charged lipids masses for (i in 1:ncol(s1)) { s1["round.protonated", i] <- round(as.numeric(s1["protonated", i]), digits = rounder) s1["round.ammoniated", i] <- round(as.numeric(s1["ammoniated", i]), digits = rounder) s1["round.sodiated", i] <- round(as.numeric(s1["sodiated", i]), digits = rounder) s1["round.potassiated", i] <- round(as.numeric(s1["potassiated", i]), digits = rounder) s1["round.deprotonated", i] <- round(as.numeric(s1["deprotonated", i]), digits = rounder) s1["round.chlorinated", i] <- round(as.numeric(s1["chlorinated", i]), digits = rounder) s1["round.acetate", i] <- round(as.numeric(s1["acetate", i]), digits = rounder) } library <- cbind(library, s1) } } return(library) } #' ======================================================================== #' extract all m/zs from image data #' #' @param files spectra raw file (consisting of .imzML and .ibd file); #' multiple files processing in devleopment, at the moment one file at a time #' @param spectra_dir Defines the path to the spectral files #' @param imzMLparse path to imzMLConverter #' @param thres.int Defines if intensity threshold, above which ions are retained #' @param thres.low Defines the minumum m/z threshold, above which ions will be retained #' @param thres.high Defines the minumum m/z threshold, below which ions will be retained #' @return List containing two elements. The first is a numeric vector #' containing of all unique masses, the second a list of height, width and #' height.width in pixels for each file. #' @export #' ----------------------------------------------------------------------- #' wl-24-11-2017, Fri: remove spectra_dir #' ----------------------------------------------------------------------- mzextractor <- function(files, imzMLparse, thres.int = 10000, thres.low = 200, thres.high = 1000) { cat("\nStarting mzextractor...\n") #' load parse and java .jinit() .jaddClassPath(path = imzMLparse) log <- vector() sizes <- list() all.mzs <- vector() for (a in 1:length(files)) { imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(files[a]) #' imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(paste(spectra_dir,substr(files[a],2,100),sep='')) width <- J(imzML, "getWidth") height <- J(imzML, "getHeight") size <- height * width sizes[[a]] <- list(height, width, size) #' plot(mzs, counts, "l") #' determine list of image wide m/z values -> store in vectore s.mz (unique and sorted) marker <- 0 for (i in 1:(height * 1)) { for (j in 1:(width * 1)) { #' i is width, j is height spectrum <- J(imzML, "getSpectrum", as.integer(j), as.integer(i)) mzs <- J(spectrum, "getmzArray") counts <- J(spectrum, "getIntensityArray") scan <- cbind("r.mz" = round(mzs, digits = 4), counts) f.scan <- scan[scan[, 2] > thres.int, , drop = FALSE] all.mzs <- rbind(all.mzs, f.scan[, 1:2]) #' all.mzs<-c(all.mzs, f.scan[,1]) } #' progress report if ((i / height) * 100 > marker + 5) { marker <- marker + 5 print(paste(marker, "% complete", sep = "")) } } print(paste("mzs from", size, "spectra in", files[a], "now read.", sep = " ")) } #' end of reading in files final.size <- 0 for (a in 1:length(sizes)) final.size <- final.size + as.numeric(sizes[[a]][3]) mz <- all.mzs[, 1] u.mz <- unique(mz) s.mz <- sort(u.mz) f.s.mz <- s.mz[s.mz > thres.low & s.mz < thres.high ] summary <- paste(length(f.s.mz), "unique ions retained within between", thres.low, "m/z and", thres.high, "m/z from", length(u.mz), "unique ions detected across all pixels.", sep = " " ) print(summary) #' output <- list(f.s.mz, sizes) output <- list(f.s.mz = f.s.mz, sizes = sizes) return(output) } #' ======================================================================== #' bin all m/zs by ppm bucket #' #' @param extracted Product of mzextractor:list containing matrix of m/zs #' and intensities in the first element. #' @param bin.ppm Defines width of the ppm bin. #' @return List containing two elements. The first is a matrix of extracted #' m/z and binned m/z for each extracted m/z, the second is a vector of all #' unique binned m/z values. #' @export peakpicker.bin <- function(extracted, bin.ppm = 12) { cat("\nStarting peakpicker.bin...\n") spectra <- cbind(round(extracted[[1]], digits = 4), "bin.med" = 0) marker <- 0 i <- 1 #' ptm <- proc.time() while (i <= nrow(spectra)) { #' group ions into common bin result <- spectra[ spectra[, 1] >= spectra[i, 1] & spectra[, 1] <= (spectra[i, 1] + (bin.ppm * spectra[i, 1]) / 1000000), ] if (class(result) == "matrix" & spectra[i, "bin.med"] == 0) { spectra[i:(i + nrow(result) - 1), 2] <- round(median(result[, 1]), digits = 4) i <- i + nrow(result) } #' if single ion in bin, label with it's m/z else { spectra[i, 2] <- spectra[i, 1] i <- i + 1 } #' progress report if (i / nrow(spectra) * 100 > marker + 1) { marker <- marker + 1 print(paste(marker, "% complete", sep = "")) } } #' print(proc.time() - ptm) bin.spectra <- spectra finalmz <- unique(spectra[, 2]) summary <- paste(nrow(spectra), "ions across all pixels binned to", length(finalmz), "bins.", sep = " " ) print(summary) #' log <- c(log,summary) #' peaks <- list(bin.spectra, finalmz) peaks <- list(bin.spectra = bin.spectra, finalmz = finalmz) return(peaks) } #' ======================================================================== #' Generate subset of first image file to improve speed of deisotoping. #' #' @param extracted Product of mzextractor:list containing matrix of m/zs #' and intensities in the first element. #' @param peaks Product of peakpicker.bin:list containing a vector of all #' unique binned m/z values in the 2nd element. #' @param percentage.deiso Defines the proportion of total pixels to select, #' at random from the first file to produce a subset of the image #' @param thres.int Defines if intensity threshold, above which ions are #' retained #' @param thres.low Defines the minumum m/z threshold, above which ions will #' be retained #' @param thres.high Defines the minumum m/z threshold, below which ions #' will be retained #' @param files a vector of file names #' @param spectra_dir Defines the path to the spectral files #' @param imzMLparse path to imzMLConverter #' @return matrix of subset of first image file. variables are binned #' peak m/z, observations are pixels (n = percentage.deiso x size) #' chosen at random from the first image file. #' @export #' #' ----------------------------------------------------------------------- #' wl-24-11-2017, Fri: remove spectra_dir #' ----------------------------------------------------------------------- subsetImage <- function(extracted, peaks, percentage.deiso = 3, thres.int = 10000, thres.low = 200, thres.high = 2000, files, imzMLparse) { cat("\nMaking image subset...\n") #' R parser .jinit() .jaddClassPath(path = imzMLparse) sizes <- extracted[[2]] bin.spectra <- peaks[[1]] finalmz <- peaks[[2]] file <- sample(1:length(sizes), 1, replace = F, prob = NULL) #' wl-07-11-2017, Tue: change #' imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(paste(getwd(),substr(files[file],2,100),sep='')) #' imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(paste(spectra_dir,substr(files[file],2,100),sep='')) #' wl-24-11-2017, Fri: change again imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(files[file]) subset <- sample(1:as.numeric(sizes[[file]][3]), as.numeric(sizes[[file]][3]) * (percentage.deiso / 100), replace = FALSE, prob = NULL ) temp.image <- cbind(as.numeric(finalmz), matrix(0, nrow = length(finalmz), ncol = length(subset) )) marker <- 0 for (n in 1:length(subset)) { remainder <- subset[n] rows <- floor(remainder / as.numeric(sizes[[file]][2])) cols <- remainder - (rows * as.numeric(sizes[[file]][2])) if (cols == 0) { remainder <- remainder - 1 rows <- floor(remainder / as.numeric(sizes[[file]][2])) cols <- remainder - (rows * as.numeric(sizes[[file]][2])) } #' i is height, j is width spectrum <- J(imzML, "getSpectrum", as.integer(cols), as.integer(rows + 1)) mzs <- J(spectrum, "getmzArray") counts <- J(spectrum, "getIntensityArray") scan <- cbind("r.mz" = round(mzs, digits = 4), counts) f.scan <- scan[scan[, 2] > thres.int & scan[, 1] > thres.low & scan[, 1] < thres.high, , drop = FALSE] if (length(f.scan) > 0) { for (k in 1:nrow(f.scan)) { bin.group <- bin.spectra[which(f.scan[k, 1] == bin.spectra[, 1], arr.ind = T), 2] if (length(bin.group) > 0) { temp.image[which(temp.image[, 1] == bin.group), n + 1] <- as.numeric(f.scan[k, "counts"]) } } } if ((n / length(subset) * 100) > marker + 5) { marker <- marker + 5 print(paste(marker, "% done", sep = "")) } } colnames(temp.image) <- c("mzbin", subset) return(temp.image) } #' ======================================================================== #' Filter to a matrix subset that includes variables above a threshold of #' missing values #' #' @param imagedata.in dataframe containing spectral informaion #' @param thres.filter Defines threshold for proportion of missing values #' (this is the step number, not the actual proportion) #' @param offset Defines the number of columns that preceed intensitiy values #' @param steps Sequence of values between 0 and 1 that define the #' thresholds of missing values to test #' @return Matrix of image data containing only variables with a missing #' value proporiton below thres.filter. #' @export filter <- function(imagedata.in, steps = seq(0, 1, 0.05), thres.filter = 11, offset = 4) { cat("\nStarting filter\n") zeros <- zeroperrow(steps, as.matrix(imagedata.in[, (offset + 1):ncol(imagedata.in)])) image.filtered <- imagedata.in[zeros[[thres.filter]], ] return(image.filtered) } #' ======================================================================== #' Generate subset of first image file to improve speed of deisotoping. #' #' @param ppm Tolerance (ppm) within which mass of isotope must be within #' @param no_isotopes Number of isotopes to consider (1 or 2) #' @param prop.1 Proportion of monoisotope intensity the 1st isotope #' intensity must not exceed #' @param prop.2 Proportion of monoisotope intensity the 2nd isotope #' intensity must not exceed #' @param peaks Product of peakpicker.bin:list containing a vector of all #' unique binned m/z values in the 2nd element. #' @param image.sub Product of subset.image: matrix containing intensity #' values to average for each binned m/z #' @param search.mod Search modifications T/F. #' @param mod modifications to search eg. c(NL=T, label=F, oxidised=T,desat=T) #' @param lookup_mod A dataframe defining modifications #' #' @return List of elements. Each element contains a dataframe with columns #' for m/z, mean intensity, deisotope annotation, modification annotation. #' Element 1, 2 and 3 have dataframes contain rows for all peaks, deisotoped #' and isotopes only. #' @export deisotope <- function(ppm = 3, no_isotopes = 2, prop.1 = 0.9, prop.2 = 0.5, peaks, image.sub, search.mod = F, mod = c(NL = T, label = F, oxidised = T, desat = T), lookup_mod) { cat("\nStarting deisotoping and difference scanning...\n") counts <- round(rowMeans(image.sub[, 2:ncol(image.sub)]), digits = 0) spectra <- cbind(peaks[[2]], counts, "", "") colnames(spectra) <- c("mz.obs", "intensity", "isotope", "modification") C13_1 <- 1.003355 C13_2 <- C13_1 * 2 #' set pmm window #' make column to store isotope annotation #' isotope counter k <- 0 m <- 0 #' run loop to find isotopes for each ion. for (i in (1:nrow(spectra) - 1)) { #' values of search mass <- as.numeric(spectra[i, 1]) intensity <- as.numeric(spectra[i, 2]) #' calculated values offset <- (ppm * mass) / 1000000 #' find isotope with ppm filter on isotpe search <- round((mass + C13_1), digits = 3) top <- search + offset bottom <- search - offset result <- spectra[as.numeric(spectra[, "intensity"]) <= (intensity * prop.1) & spectra[, 1] >= bottom & spectra[, 1] <= top & spectra[, "isotope"] == "", ] result <- rbind(result, blank1 = "", blank2 = "") if (no_isotopes == 2) { #' find isotope with ppm filter on isotpe search <- round((mass + C13_2), digits = 3) top <- search + offset bottom <- search - offset result_2 <- spectra[as.numeric(spectra[, "intensity"]) <= (intensity * prop.2) & spectra[, 1] >= bottom & spectra[, 1] <= top & spectra[, "isotope"] == "", ] result_2 <- rbind(result_2, blank1 = "", blank2 = "") } result_3 <- vector() if (search.mod != F) { for (j in 1:nrow(lookup_mod)) { if (mod[which(lookup_mod[j, "type"] == rownames(as.data.frame(mod)))] == T) { #' find isotope with ppm filter on isotpe search <- round(mass + lookup_mod[j, "mass"], digits = 3) top <- search + offset bottom <- search - offset if (0 != length(spectra[ spectra[, 1] >= bottom & spectra[, 1] <= top, ])) { temp.hits <- rbind(spectra[ spectra[, 1] >= bottom & spectra[, 1] <= top, ]) for (l in 1:nrow(temp.hits)) { res_3_temp <- c(temp.hits[l, 1], paste(as.vector(lookup_mod[j, "class"]), "(", row.names(lookup_mod[j, ]), ")", sep = "")) result_3 <- rbind(result_3, res_3_temp) } #' res_3_temp <- c(spectra[spectra[,1] >= bottom & spectra[,1] <= top,], paste(as.vector(lookup_mod[j,"class"]), "(", row.names(lookup_mod[j,]),")", sep="")) } } } } result_3 <- rbind(result_3, blank1 = "", blank2 = "") #' result<- as.matrix(result) #' add intensity filter #' iso_intensity <- (((mass-380)/8.8)/100)*intensity if (nrow(result) > 2) { k <- k + 1 spectra[i, "isotope"] <- paste(spectra[i, "isotope"], " ", "[", k, "]", "[M]", sep = "") for (j in 1:(nrow(result) - 2)) { indices <- which(spectra == result[j, 1], arr.ind = TRUE) spectra[indices[, "row"], "isotope"] <- paste(spectra[indices[, "row"], "isotope"], " ", "[", k, "]", "[M+1]", sep = "") } if (no_isotopes == 2 && nrow(result_2) > 2) { for (j in 1:(nrow(result_2) - 2)) { indices <- which(spectra == result_2[j, 1], arr.ind = TRUE) spectra[indices[, "row"], "isotope"] <- paste(spectra[indices[, "row"], "isotope"], " ", "[", k, "]", "[M+2]", sep = "") } } } if (nrow(result_3) > 2) { m <- m + 1 spectra[i, "modification"] <- paste(spectra[i, "modification"], " ", "Precursor[", m, "]", sep = "") for (j in 1:(nrow(result_3) - 2)) { indices <- which(spectra == result_3[j, 1], arr.ind = TRUE) spectra[indices[, "row"], "modification"] <- paste(spectra[indices[, "row"], "modification"], " ", "Fragment[", m, "] ", result_3[j, ncol(result_3)], sep = "") } } } allpeaks <- as.data.frame(spectra) deisotoped <- allpeaks[(grep("\\[M\\+", allpeaks$isotope, invert = T)), ] isotopes <- allpeaks[(grep("\\[M\\+", allpeaks$isotope, invert = F)), ] results <- list(allpeaks = allpeaks, deisotoped = deisotoped, isotopes = isotopes) summary <- paste(length(as.vector(deisotoped$mz.obs)), "monoisotopic peaks retained and", length(as.vector(isotopes$mz.obs)), "C13 isotopes discarded from", length(as.vector(allpeaks$mz.obs)), "detected ions", sep = " " ) print(summary) return(results) } #' ======================================================================== #' Performs identification of lipids and adducts #' #' @param ionisation_mode "positive" or "negative" determines which adducts #' to search for #' @param deisotoped Product of function 'deisotope': list with dataframe in #' the second element containing image data for deisotoped data #' @param adducts vector of adducts to be searched in the library of lipid #' masses #' @param ppm.annotate Defines if ppm threshold for which |observed m/z - #' theotical m/z| must be less than for annotation to be retained #' @param dbase The product of library - dataframe containing lipid massess. #' @return Character vector containing annotations #' @export #' #' wl-07-11-2017, Tue: should remove adducts in function arguments #' annotate <- function(ionisation_mode, deisotoped, #' adducts=c(H = T, NH4 = F, Na = T, K = T, dH = F, Cl = F, OAc = F), ppm.annotate = 10, dbase) { cat("\nStarting annotation\n") d.finalmz <- as.vector(deisotoped[[2]]$mz.obs) #' deisotoped s1 <- dbase spectra <- cbind(round(as.numeric(d.finalmz), digits = 3), d.finalmz) combined <- vector() sel.adducts <- vector() index <- 13 # offset to search only rounded masses in library if (ionisation_mode == "positive") { adducts <- c(H = T, NH4 = F, Na = T, K = T, dH = F, Cl = F, OAc = F) } if (ionisation_mode == "negative") { adducts <- c(H = F, NH4 = F, Na = F, K = F, dH = T, Cl = T, OAc = F) } for (a in 1:length(adducts)) { if (adducts[a] == T) sel.adducts <- c(sel.adducts, index + a) } for (i in 1:nrow(spectra)) { search <- as.numeric(spectra[i, 1]) offset <- (ppm.annotate * search) / 1000000 top <- search + offset bottom <- search - offset result <- which(s1[sel.adducts, ] >= bottom & s1[sel.adducts, ] <= top, arr.ind = TRUE) if (nrow(result) > 0) { for (j in 1:nrow(result)) { col <- result[j, "col"] row <- result[j, "row"] row <- sel.adducts[row] #' determine the adduct that was matched, summarising match information from library for matched mass (as 'data') #' determine which adduct if (row == "14") { adduct <- "protonated" name.adduct <- "H" } if (row == "15") { adduct <- "ammoniated" name.adduct <- "NH4" } if (row == "16") { adduct <- "sodiated" name.adduct <- "Na" } if (row == "17") { adduct <- "potassiated" name.adduct <- "K" } if (row == "18") { adduct <- "deprotonated" name.adduct <- "-H" } if (row == "19") { adduct <- "chlorinated" name.adduct <- "Cl" } if (row == "20") { adduct <- "acetate" name.adduct <- "OAc" } a.ppm <- round(abs(((as.numeric(spectra[i, 2]) - as.numeric(s1[adduct, col])) / as.numeric(spectra[i, 2])) * 1000000), digits = 1) #' make vector with summary of match and paired match data <- c( s1[row, col], s1[adduct, col], spectra[i, 2], a.ppm, s1["formula", col], name.adduct, s1["protonated", col], s1["FA1", col], s1["FA2", col], s1["FA3", col] ) #' make matrix of search results combined <- rbind(combined, unlist(data, use.names = F)) } } } if (length(combined) > 0) { colnames(combined) <- c( "mz.matched", "mz.matched.lib", "mz.observed", "ppm", "formula", "adduct", "mz.lib.protonated", "FA1", "FA2", "FA3" ) ids <- unique.matrix(combined[, c(3, 5, 6)]) annotations <- cbind(d.finalmz, "") for (i in 1:nrow(annotations)) { result <- which(ids[, 1] == annotations[i, 1], arr.ind = T) if (length(result) > 0) { for (j in 1:length(result)) { annotations[i, 2] <- paste(annotations[i, 2], "[", ids[result[j], "formula"], "+", ids[result[j], "adduct"], "]", sep = "") } } } #' image.roi<-cbind(image.roi[,2:ncol(image.roi)]) #' vresults<-cbind(annotations[,2], image.roi[,1],image.roi[,2:ncol(image.roi)]) #' colnames(image)<-c("annoation","modification","bin.mz", files) summary <- paste(length(annotations[annotations[, 2] != "", 2]), "from", length(as.vector(deisotoped[[2]]$mz.obs)), "monoisotopic peaks were annoated (using accuract mass) with a", ppm.annotate, "ppm tollerance", sep = " " ) print(summary) return(annotations[, 2]) } if (length(combined) == 0) { print("No annotations were made") } } #' ======================================================================== #' construct dataframe of image data #' #' @param extracted Product of mzextractor: list containing a list of the #' image diminsions in the 2nd element. #' @param deisotoped Product of deisotope #' @pgqaram peaks Product of peakpicker.bin:list containing a matrix of all #' peaks and the corresponding m/z bin in the first element. #' @param imzMLparse path to imzMLConverter #' @param spectra_dir Defines the path to the spectral files #' @param thres.int Defines if intensity threshold, above which ions are #' retained #' @param thres.low Defines the minumum m/z threshold, above which ions will #' be retained #' @param thres.high Defines the minumum m/z threshold, below which ions #' will be retained #' @param files a vector of file names #' @return Dataframe of image data. Variables are deisotoped binned m/z, #' observations are pixels. For image of width w and height h, the number of #' columns is w x h. The first w columns are from the first row (from left #' to right), the next w columns are the next row, from left to right and so #' on. List, first element is has column containing m/z values preceeding #' image data, second element has 4 columns preceeding image data which #' include m/z, annotation, isotope status, modification status. #' @export #' ----------------------------------------------------------------------- #' wl-24-11-2017, Fri: remove spectra_dir #' ----------------------------------------------------------------------- contructImage <- function(extracted, deisotoped, peaks, imzMLparse, thres.int = 10000, thres.low = 200, thres.high = 1000, files) { cat("\nStarting 'constructImage'...\n") .jinit() .jaddClassPath(path = imzMLparse) sizes <- extracted[[2]] final.size <- 0 for (a in 1:length(sizes)) final.size <- final.size + as.numeric(sizes[[a]][3]) bin.spectra <- peaks[[1]] d.finalmz <- as.vector(deisotoped[[2]]$mz.obs) image <- cbind( as.numeric(d.finalmz), matrix(0, nrow = length(d.finalmz), ncol = final.size) ) for (a in 1:length(files)) { height <- as.numeric(sizes[[a]][1]) width <- as.numeric(sizes[[a]][2]) #' wl-24-11-2017, Fri: remove spectra_dir here #' imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(paste(spectra_dir,substr(files[a],2,100),sep='')) imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(files[a]) marker <- 0 for (i in 1:height) { for (j in 1:width) { spectrum <- J(imzML, "getSpectrum", as.integer(j), as.integer(i)) mzs <- J(spectrum, "getmzArray") counts <- J(spectrum, "getIntensityArray") scan <- cbind("r.mz" = round(mzs, digits = 4), counts) f.scan <- scan[scan[, 2] > thres.int & scan[, 1] > thres.low & scan[, 1] < thres.high, , drop = FALSE] if (length(f.scan) > 0) { for (k in 1:nrow(f.scan)) { bin.group <- bin.spectra[which(f.scan[k, 1] == bin.spectra[, 1], arr.ind = T), 2] if (length(bin.group) > 0) { image[which(image[, 1] == bin.group), ((i - 1) * width) + (j + 1)] <- as.numeric(f.scan[k, "counts"]) } #' if(length(result)>0) image[result,((i-1)*width)+(j+1)]<- agg.mzs[k,"counts"] } } } #' progress report if ((i / height) * 100 > marker + 1) { marker <- marker + 1 print(paste(marker, "% done", sep = "")) } } } cat("\nimzMLcube constructed!\n") return(image) } #' ======================================================================== #' Normalise image data #' #' @param imagedata.in Product of 'constructImage' function. #' @param norm.type Mode of normalisation. Valid argument: "standards", #' "TIC", "median", "none". #' @param standards vector of row indices corresponding to variables that #' are standards. #' @param offset number of columns that preceed image data #' @return Normalised dataframe containing image data. #' @export normalise <- function(imagedata.in = image.ann, norm.type = "TIC", standards = NULL, offset = 4) { if (norm.type == "standards") { #' from standards images.f.n <- norm.standards(imagedata.in, offset, standards) imagedata.in <- images.f.n } if (norm.type == "TIC") { #' from TIC images.f.n <- norm.TIC(imagedata.in, offset) image imagedata.in <- images.f.n } if (norm.type == "median") { #' from median images.f.n <- norm.median(imagedata.in, offset) imagedata.in <- images.f.n } if (norm.type == "none") { #' no normalisation imagedata.in <- imagedata.in } return(imagedata.in) } #' ======================================================================== #' Normalise data to the median ion current #' #' @param imagedata.in Product of 'constructImage' function. #' @param offset number of columns that preceed image data #' @export #' ----------------------------------------------------------------------- #' wl-08-11-2017, Wed: rowMedians can be replaced by median. Example of #' rowMedians in `Biobase`: #' ----------------------------------------------------------------------- #' set.seed(1) #' x <- rnorm(n=234*543) #' x[sample(1:length(x), size=0.1*length(x))] <- NA #' dim(x) <- c(234,543) #' y1 <- rowMedians(x, na.rm=TRUE) #' y2 <- apply(x, MARGIN=1, FUN=median, na.rm=TRUE) #' stopifnot(all.equal(y1, y2)) #' x <- cbind(x1=3, x2=c(4:1, 2:5)) #' stopifnot(all.equal(rowMeans(x), rowMedians(x))) norm.median <- function(imagedata.in, offset) { if (F) { library(Biobase) medians.1 <- as.vector(rowMedians(t(as.matrix(imagedata.in[, (offset + 1):ncol(imagedata.in)], na.rm = T)))) #' wl-29-01-2018, Mon: The parentheses for `na.rm = T` is wrong. } else { #' wl-29-01-2018, Mon: Do not use `rowMedians` in `Biobase`. tmp <- t(as.matrix(imagedata.in[, (offset + 1):ncol(imagedata.in)])) medians <- apply(tmp, MARGIN = 1, FUN = median, na.rm = TRUE) medians <- as.vector(medians) } factor <- medians / mean(medians) image.norm <- cbind(imagedata.in[, 1:offset], t(t(imagedata.in[, (offset + 1):ncol(imagedata.in)]) / factor)) empty.spectra <- which(factor == 0, arr.ind = TRUE) if (length(empty.spectra) > 1) { for (i in 1:length(empty.spectra)) { for (j in 1:nrow(image.norm)) image.norm[j, empty.spectra[i] + offset] <- 0 } } image.norm <- cbind(imagedata.in[, 1:offset], image.norm[, (offset + 1):ncol(image.norm)]) #' hist(factor, breaks=100) image.norm } #' ======================================================================== #' Normalise data to standards #' @param imagedata.in Product of 'constructImage' function. #' @param offset number of columns that preceed image data #' @param standards vector of row indices corresponding to variables that are standards. #' @export #' wl-30-01-2018, Tue: modify and debug norm.standards <- function(imagedata.in, offset, standards = NULL) { #' norm.standards <- function(imagedata.in, offset, standards){ #' wl-30-01-2018, Tue: do not plot too many boxplots if (F) { for (i in 1:length(standards)) boxplot(as.vector(t(imagedata.in[standards[i], (offset + 1):length(imagedata.in)])), main = paste("distribution of standard", imagedata.in[standards[i], 1], ".", sep = " " ) ) } #' wl-30-01-2018, Tue: added. if (is.null(standards)) { standards <- 1:nrow(imagedata.in) } av <- mean(colMeans(imagedata.in[standards, (offset + 1):length(imagedata.in)])) norm <- t(imagedata.in[, (offset + 1):ncol(imagedata.in)]) / colMeans(imagedata.in[standards, (offset + 1):length(imagedata.in)]) norm <- norm * av norm.rep <- replace(norm, norm == "NaN", 0) norm.rep <- replace(norm.rep, norm == "Inf", 0) image.norm <- cbind(imagedata.in[, 1:offset], t(norm.rep)) image.norm } #' ======================================================================== #' Normalise data to the TIC #' @param imagedata.in Product of 'constructImage' function. #' @param offset number of columns that preceed image data #' @export norm.TIC <- function(imagedata.in, offset) { sums <- as.vector(colSums(imagedata.in[, (offset + 1):ncol(imagedata.in)], na.rm = T )) factor <- sums / mean(sums) image.norm <- cbind( imagedata.in[, 1:offset], t(t(imagedata.in[, 5:ncol(imagedata.in)]) / factor) ) empty.spectra <- which(factor == 0, arr.ind = TRUE) if (length(empty.spectra) > 0) { for (i in 1:length(empty.spectra)) { for (j in 1:nrow(image.norm)) image.norm[j, empty.spectra[i] + offset] <- 0 } } image.norm <- cbind( imagedata.in[, 1:offset], image.norm[, (offset + 1):ncol(image.norm)] ) #' hist(factor, breaks=100) return(image.norm) } #' ======================================================================== #' Remove outliers for image analysis #' #' @param x image data in #' @param na.rm remove missing values #' @param replace.1.min initial value to replace minimum values with #' @param replace.1.max initial value to replace maximum values with #' @return matrix of image data with outliers removed #' @export remove_outliers <- function(x, na.rm = TRUE, replace.1.min, replace.1.max) { qnt <- quantile(x, probs = c(.25, .75), na.rm = na.rm) H <- 1.5 * IQR(x, na.rm = na.rm) y <- x y[x < (qnt[1] - H)] <- replace.1.min y[x > (qnt[2] + H)] <- replace.1.max y[x < (qnt[1] - H)] <- min(y) y[x > (qnt[2] + H)] <- max(y) y } #' ======================================================================== #' Rescale image values #' #' @param x image date in #' @param scale range of scaled data #' @return matrix of scaled data #' @export rescale <- function(x, scale) { ((x - min(x)) / (max(x) - min(x))) * scale } #' ======================================================================== #' Normalise image data #' #' @param imagedata.in Product of 'constructImage' function. #' @param scale.type Mode of scaling Valid argument: "c", "cs", "none" for #' centre and center + pareto scaling, respectively . #' @param transform log transform data T/F #' @param offset number of columns that precede image data #' @return Centred, scaled, transformed dataframe containing image data. #' @export centreScale <- function(imagedata.in = image.ann, scale.type = "cs", transform = F, offset = 4) { #' matrix of non-transformed data if (transform == F) { matr <- as.matrix(imagedata.in[, (offset + 1):ncol(imagedata.in)]) } #' log transform data to remove heteroscedasticity if (transform == T) { matr <- as.matrix(log(imagedata.in[, (offset + 1):ncol(imagedata.in)] + 1)) #' log of 0 causes -inf values so +1..zeros (missing values) become 0 } matr[matr == 0] <- NA # replace zeros for NA so they are ommited from center and scaling if (scale.type == "c") { matr <- as.matrix(log(imagedata.in[, (offset + 1):ncol(imagedata.in)] + 1)) #' log of 0 causes -inf values so +1..zeros (missing values) become 0 } } if (scale.type == "cs") { matr <- t(scale(t(matr), center = TRUE, scale = apply(t(matr), 2, function(x) sqrt(sd(x, na.rm = TRUE))))) #' mean center and pareto scale } if (scale.type == "pareto") { matr <- t(scale(t(matr), center = FALSE, scale = apply(t(matr), 2, function(x) sqrt(sd(x, na.rm = TRUE))))) #' pareto scale only for slicing=T } if (scale.type == "none") { matr <- matr # no scaling } #' replace NA with value (in this case 0) matr[which(is.na(matr))] <- 0 imagedata.in <- cbind(imagedata.in[, 1:offset], matr) return(imagedata.in) } #' ======================================================================== #' determine the number of zeros per row in a matrix #' @param steps Sequence of values between 0 and 1 that define the #' thresholds of missing values to test #' @param matrix Matrix of image data #' @return List of row indices corresponing to variables that were retained #' at each thres.filter step. #' @export #' ----------------------------------------------------------------------- #' wl-25-11-2017, Sat: add plot.f zeroperrow <- function(steps, matrix, plot.f = FALSE) { #' filter summary for MVA indices <- list() results <- vector() counter <- 1 for (threshold in steps) { filter <- vector() for (i in 1:nrow(matrix)) { if ((length(which(matrix[i, ] < 1)) / ncol(matrix)) <= threshold) filter <- c(filter, i) } print(paste("Step ", counter, ": ", length(filter), " records at ", threshold, " threshold.", sep = "")) #' record the retained indices at each threshold indices[counter] <- list(filter) results <- c(results, length(matrix[filter, 1])) counter <- counter + 1 } if (plot.f) { plot(steps, results[1:(length(results))], ylab = "Number of ions", xlab = "% missing values", main = "Coverage of ions across selected pixels", type = "s" ) } #' results return(indices) } #' ======================================================================== #' Create heat map for PCA image #' #' Generate heat map based on spectral information. #' #' @param imagedata.in Dataframe containing image data. #' @param offset number of columns preceeding image data #' @param PCnum number of PCs to consider #' @param scale range of scale that intensity values will be scaled to. #' @param x.cood width of image. #' @param y.cood height of image. #' @param nlevels Graduations of colour scale. #' @param res.spatial spatial resolution of image #' @param rem.outliers Remove intensities that are outliers, valid arguments: "only", "true". #' @param summary T/F #' @param title show titles, T/F". #' @return heatmap image #' @export imagePca <- function(imagedata.in, offset, PCnum, scale, x.cood, y.cood, nlevels, res.spatial, summary, title = T, rem.outliers = TRUE) { #' res.spatial,summary, title=T, rem.outliers="only"){ pca <- princomp(t(imagedata.in[, (offset + 1):ncol(imagedata.in)]), cor = FALSE, scores = TRUE, covmat = NULL ) for (i in 1:PCnum) { imageSlice( row = i, imagedata.in = t(pca$scores), scale, x.cood, y.cood, nlevels, name = paste("PC", i, sep = ""), subname = "", offset = 0, res.spatial, rem.outliers, summary, title ) labs.all <- as.numeric(as.vector(imagedata.in[, 1])) perc <- 5 # percent of data to label y <- cut(pca$loadings[, i], breaks = c( -Inf, quantile(pca$loadings[, i], p = c(perc / 100)), quantile(pca$loadings[, i], p = c(1 - (perc / 100))), Inf ), labels = c("low", "mid", "long") ) labs <- labs.all labs[which(y == "mid")] <- "" plot( x = as.numeric(as.vector(imagedata.in[, 1])), y = pca$loadings[, i], type = "n", main = paste("PC", i, " loadings", sep = ""), xlab = "m/z", ylab = "p[4]" ) lines( x = as.numeric(as.vector(imagedata.in[, 1])), y = pca$loadings[, i], type = "h" ) textxy( X = as.numeric(as.vector(imagedata.in[, 1])), Y = pca$loadings[, i], labs = labs, cx = 0.5, dcol = "black", m = c(0, 0) ) #' wl-23-08-2017, Wed: 'textxy' (from 'calibrate') places labels in a plot } } #' ======================================================================== #' Generate heat map based on spectral information. #' #' @param row indices of row which corresponds to spectral data to plot; #' use image.norm_short.csv to hel identify row number of interest #' @param imagedata.in Dataframe containing image data #' @param scale range of scale that intensity values will be scaled to #' @param x.cood width of image #' @param y.cood height of image #' @param nlevels Graduations of colour scale #' @param name main name of image #' @param subname sub name of image #' @param res.spatial spatial resolution of image #' @param offset number of columns preceding image data #' @param rem.outliers Remove intensities that are outliers, valid arguments: #' "only" (without outliers only) or T (with and without outliers0) #' @param summary T/F #' @param title show titles, T/F #' @return heatmap image #' @export #' #' ------------------------------------------------------------------------ #' wl-23-08-2017, Wed: 'filled.contour' {graphics}: This function produces a #' contour plot with the areas between the contours filled in solid color #' (Cleveland calls this a level plot). A key showing how the colors map to #' z values is shown to the right of the plot. #' ------------------------------------------------------------------------ #' wl-19-11-2017, Sun: #' 1.) @param rem.outliers Remove outliers or not #' 2.) change 'summary' condition. Default is FALSE #' ------------------------------------------------------------------------ imageSlice <- function(row, imagedata.in, scale, x.cood, y.cood, nlevels, name, subname, offset, res.spatial, rem.outliers, summary, title = T) { slice <- as.numeric(as.vector(as.matrix(imagedata.in[row, (1 + offset):ncol(imagedata.in)]))) #' wl-24-08-2017: take intensity for one m/z. (1+offset):ncol(imagedata.in) #' is for only numeric values. #' ----------------------------------------------------------------------- #' if(rem.outliers != "only"){ if (!rem.outliers) { #' wl-19-11-2017, Sun: changed #' if(summary==F){ if (summary) { #' wl-19-11-2017, Sun: changed. Should be TRUE boxplot(slice, main = (paste("distribution with 'outliers' for", imagedata.in[row, 1], ".", sep = " ")), cex.main = 1 ) hist(slice, breaks <- seq(min(slice), max(slice), (max(slice) - min(slice)) / 100), prob = T, main = paste("distribution with 'outliers' for", imagedata.in[row, 1], ".", sep = " "), cex.main = 1 ) lines(density(slice), col = "red", lwd = 2) } rescaled <- rescale(slice, scale) section <- t(matrix(rescaled, nrow = y.cood, ncol = x.cood, byrow = T)) filled.contour( x = seq(from = 1, to = x.cood, length = x.cood) * res.spatial, y = seq(from = 1, to = y.cood, length = y.cood) * res.spatial, z = section, nlevels = 50, axes = TRUE, asp = 1, plot.title = title(xlab = "x (micrometers)", ylab = "y (micrometers)", cex.axis = 0.5), color.palette = topo.colors ) if (title == T) { title(main = paste("with outliers", name, sep = " "), cex.main = 1) } title(sub = subname, cex.sub = 1 / 2) } #' ----------------------------------------------------------------------- #' if(rem.outliers == T || rem.outliers == "only"){ if (rem.outliers) { #' wl-19-11-2017, Sun: changed slice <- remove_outliers(x = slice, na.rm = TRUE, replace.1.min = 0, replace.1.max = 0) #' if(summary==F){ if (summary) { #' wl-19-11-2017, Sun: changed. Should be TRUE boxplot(slice, main = (paste("distribution without 'outliers' for", imagedata.in[row, 1], ".", sep = " ")), cex.main = 1) hist(slice, breaks <- seq(min(slice), max(slice), (max(slice) - min(slice)) / 100), prob = T, main = paste("distribution without 'outliers' for", imagedata.in[row, 1], ".", sep = " "), cex.main = 1 ) lines(density(slice), col = "red", lwd = 2) } rescaled <- rescale(slice, scale) section <- t(matrix(rescaled, nrow = y.cood, ncol = x.cood, byrow = T)) filled.contour( x = seq(from = 1, to = x.cood, length = x.cood) * res.spatial, y = seq(from = 1, to = y.cood, length = y.cood) * res.spatial, z = section, nlevels = 50, axes = TRUE, asp = 1, plot.title = title(xlab = "x (micrometers)", ylab = "y (micrometers)", cex.axis = 0.5), color.palette = topo.colors ) if (title == T) { title(main = paste("without outliers", name, sep = " "), cex.main = 1) } title(sub = subname, cex.sub = 1 / 2) } } #' ======================================================================== #' k-means clustering for imaging processing #' @param cluster.type Currently only "kmeans" suported #' @param imagedata.in Dataframe containing image data #' @param offset columns preceding data #' @param res.spatial spatial resolution of the image #' @param width width of image; x.cood #' @param height height of image; y.cood #' @param clusters number of desired clusters #' @return clustered images and cluster centers; writes csv files for #' cluster centers #' @export #' #' ------------------------------------------------------------ #' wl-13-02-2018, Tue: return intensity for saving option cluster <- function(cluster.type = cluster.type, imagedata.in = imagedata.in, offset = offset, res.spatial = res.spatial, width = x.cood, height = y.cood, clusters = clusters) { #' to do k-means clustering based on spectral similarity of pixels if (cluster.type == "kmeans") { k <- kmeans(t(imagedata.in[, (offset + 1):ncol(imagedata.in)]), clusters) #' rearrange your matrix to fit in image space, where nrow=y and ncol=x #' transpose for the heatmap k.matrix <- data.frame() k.matrix <- matrix(k$cluster, nrow = height, ncol = width, byrow = T) t.k.matrix <- t(k.matrix) #' plot the heatmap, choose colour scheme and colour according to cluster class filled.contour( x = seq(1, width, length = width) * res.spatial, y = seq(1, height, length = height) * res.spatial, z = t.k.matrix, col = grey(seq(0, 1, length = 10)), #' color.palette=topo.colors, axes = TRUE, nlevels = 12, #' col=rainbow(10, alpha=0.5), asp = 1, plot.title = title(xlab = "x (micrometers)", ylab = "y (micrometers)", cex.axis = 0.5), key.title = title("Cluster") ) title(main = paste("k-means clustering -", clusters, "clusters", sep = " "), cex.main = 1) #' ------------------------------------------------------------ #' wl-13-02-2018, Tue: get intensity matrix colnames(k$centers) <- imagedata.in[, 1] mz <- as.numeric(colnames(k$centers)) Intensity <- k$centers rownames(Intensity) <- paste0("Cluster", 1:nrow(Intensity)) for (i in 1:clusters) { #' i = 2 #' wl-13-02-2018, Tue: move out this loop #' colnames(k$centers) <- imagedata.in[,1] #' mz <- as.numeric(colnames(k$centers)) #' Intensity <- k$centers[i,] #' plot(mz, Intensity, "h") plot(mz, Intensity[i, ], "h") abline(0, 0) title(main = paste("Cluster", i, sep = " ")) #' wl-24-08-2017: plot only this cluster and disable others using level #' plot. one.cluster <- t.k.matrix one.cluster[one.cluster[, ] != i] <- 0 #' wl-24-08-2017: remove other clusters. filled.contour( x = seq(1, width, length = width) * res.spatial, y = seq(1, height, length = height) * res.spatial, z = one.cluster, #' col=grey(seq(0,1,length=2)), col = c("black", sample(rainbow(10), 1)), #' col=c("black", "purple"), axes = TRUE, nlevels = 2, asp = 1, plot.title = title(xlab = "x (micrometers)", ylab = "y (micrometers)", cex.axis = 0.5), ) title(main = paste("k-means clustering -", "Cluster", i, sep = " "), cex.main = 1) #' wl-07-11-2017, Tue: any output should be outside function #' write.csv(Intensity, paste("Cluster_",i,".csv")) } Intensity #' wl-13-02-2018, Tue: return Intensity } } #' ======================================================================== #' wrapper to process image data #' #' @param process T to process data from imzML, F to skip processing #' @param pca T to conduct PCA image analysis #' @param slice T to generate two dimensional image of one ion defined in row #' @param cluster.k T to perform clustering #' @param ionisation_mode Choose "positive" or "negative"; determines which lipid classes to use when building database #' @param thres.int Defines if intensity threshold, above which ions are retained #' @param thres.low Defines the minumum m/z threshold, above which ions will be retained #' @param thres.high Defines the minumum m/z threshold, below which ions will be retained #' @param bin.ppm Mass accuracy for binning between spectra #' @param thres.filter Defines threshold for proportion of missing values (this is the step number, not the actual proportion) #' @param ppm.annotate Defines if ppm threshold for which |observed m/z - theotical m/z| must be less than for annotation to be retained #' @param res.spatial spatial resolution of image #' @param PCnum number of PCs to calculate #' @param row indices of row which corresponds to spectral data to plot in image slice - use image.norm_short.csv to find row number #' @param cluster.type At the moment only supports "kmeans" #' @param clusters Number of clusters to use for clustering #' @param ppm Tolerance (ppm) within which mass of isotope must be within . #' @param no_isotopes Number of isotopes to consider (1 or 2) #' @param prop.1 Proportion of monoisotope intensity the 1st isotope intensity must not exceed #' @param prop.2 Proportion of monoisotope intensity the 2nd isotope intensity must not exceed #' @param search.mod Search modifications T/F. #' @param mod modifications to search eg. c(NL=T, label=F, oxidised=T,desat=T) #' @param lookup_mod A dataframe defining modifications #' @param adducts vector of adducts to be searched in the library of lipid masses #' @param sel.class A vector defining classes of lipids to be included in the library #' @param fixed Defines if one of the SN positions is fixed, default is F. #' @param fixed_FA Defines the name of the fixed FA is fixed is T, e.g. 16, 16.1, 18.2. #' @param lookup_lipid_class A data.frame defining lipid classes #' @param lookup_FA A dataframe defining FAs #' @param lookup_element A dataframe defining elements #' @param files a vector of file names; currently only supports processing one file at a time #' @param spectra_dir Defines the path to the spectral files, #' @param imzMLparse path to imzMLConverter #' @param percentage.deiso Defines the proportion of total pixels to select, at random from the first file to produce a subset of the image #' @param steps Sequence of values between 0 and 1 that define the thresholds of missing values to test #' @param imagedata.in Dataframe containing image #' @param norm.type "TIC" or "median" or "standards" or "none", recommend "TIC" #' @param standards default is NULL, vector of row indices corresponding to variables that are standards #' @param scale.type options are "cs" center and pareto scale, "c" center only, "pareto" pareto only and "none" #' @param scale range of scale that intensity values will be scaled to #' @param transform T/F to perform log transform #' @param x.cood width of image #' @param y.cood height of image #' @param nlevels Graduations of colour scale. #' @param name main name of image. #' @param subname sub name of image #' @param rem.outliers Choose "only" to show only images after removing outliers, T for both with and without outliers #' @param summary T/F to show extra information #' @param title show titles, T/F #' @param offset number of columns of text preceding data #' #' @return Dataframe of annotated image data. Variables are deisotoped #' binned m/z, observations are pixels. For image of width w and height h, #' the number of columns is w x h. The first w columns are from the first #' row (from left to right), the next w columns are the next row, from left #' to right and so on. #' @export massPix <- function( process = T, pca = T, slice = T, cluster.k = T, ionisation_mode = "positive", thres.int = 500000, thres.low = 200, thres.high = 1000, bin.ppm = 12, thres.filter = 11, ppm.annotate = 12, res.spatial = 50, PCnum = 5, row = 50, cluster.type = "kmeans", clusters = 6, ppm = 3, no_isotopes = 2, prop.1 = 0.9, prop.2 = 0.5, search.mod = T, mod = c(NL = T, label = F, oxidised = T, desat = T), lookup_mod, adducts, sel.class, fixed = F, fixed_FA, lookup_lipid_class, lookup_FA, lookup_element, files, #' spectra_dir, lib_dir, #' wl-07-11-2017, Tue: added imzMLparse, percentage.deiso = 3, steps = seq(0, 1, 0.05), imagedata.in = imagedata.in, norm.type = "TIC", standards = NULL, scale.type = "cs", transform = F, scale = 100, x.cood = x.cood, y.cood = y.cood, nlevels = 50, #' name = "", #' subname = "", rem.outliers = "only", summary = T, title = T, offset = 4) { #' setting up, reading library files #' to read and process very large files options(java.parameters = "Xmx4g") #' read in library files setwd(lib_dir) read <- read.csv("lib_FA.csv", sep = ",", header = T) lookup_FA <- read[, 2:4] row.names(lookup_FA) <- read[, 1] read <- read.csv("lib_class.csv", sep = ",", header = T) lookup_lipid_class <- read[, 2:3] row.names(lookup_lipid_class) <- read[, 1] read <- read.csv("lib_element.csv", sep = ",", header = T) lookup_element <- read[, 2:3] row.names(lookup_element) <- read[, 1] read <- read.csv("lib_modification.csv", sep = ",", header = T) lookup_mod <- read[, 2:ncol(read)] row.names(lookup_mod) <- read[, 1] #' parsing the data and getting x and y dimensions imzMLparse <- paste(home_dir, "imzMLConverter/imzMLConverter.jar", sep = "") # spectra files setwd(spectra_dir) files <- list.files(, recursive = TRUE, full.names = TRUE, pattern = ".imzML") setwd(spectra_dir) .jinit() .jaddClassPath(path = imzMLparse) imzML <- J("imzMLConverter.ImzMLHandler")$parseimzML(paste(getwd(), substr(files, 2, 100), sep = "")) x.cood <- J(imzML, "getWidth") y.cood <- J(imzML, "getHeight") #' make library if (process == T) { dbase <- makelibrary(ionisation_mode, sel.class, fixed, fixed_FA, lookup_lipid_class, lookup_FA, lookup_element) print(paste("library containing", ncol(dbase), "entries was made", sep = " ")) #' extract m/z and pick peaks setwd(spectra_dir) extracted <- mzextractor(files, spectra_dir, imzMLparse, thres.int, thres.low, thres.high) peaks <- peakpicker.bin(extracted, bin.ppm) # pick peaks #' perform deisotoping on a subset of the image temp.image <- subsetImage( extracted, peaks, percentage.deiso, thres.int, thres.low, thres.high, files, spectra_dir, imzMLparse ) temp.image.filtered <- filter( imagedata.in = temp.image, steps, thres.filter, offset = 1 ) deisotoped <- deisotope(ppm, no_isotopes, prop.1, prop.2, peaks = list("", temp.image.filtered[, 1]), image.sub = temp.image.filtered, search.mod, mod, lookup_mod ) #' perform annotation of lipids using library annotated <- annotate( ionisation_mode, deisotoped, adducts, ppm.annotate, dbase ) #' make full image and add lipid ids final.image <- contructImage( extracted, deisotoped, peaks, imzMLparse, spectra_dir, thres.int, thres.low, thres.high, files ) ids <- cbind(deisotoped[[2]][, 1], annotated, deisotoped[[2]][, 3:4]) #' create annotated image image.ann <- cbind(ids, final.image[, 2:ncol(final.image)]) #' normalise image image.norm <- normalise( imagedata.in = image.ann, norm.type, standards, offset = 4 ) #' wl-23-08-2017: save results for 'make library' write.csv(image.norm[, ], "image.norm.csv") write.csv(image.norm[, 1:3], "image.norm_short.csv") image.process <- image.norm } else { image.process <- read.csv("image.norm.csv") image.process <- image.process[, 2:ncol(image.process)] } #' perform PCA if requested if (pca == T) { image.scale <- centreScale( imagedata.in = image.process, scale.type, transform, offset = 4 ) imagedata.in <- image.scale imagePca( imagedata.in = image.scale, offset = 4, PCnum, scale, x.cood, y.cood, nlevels, res.spatial, summary, title, rem.outliers ) pca <- princomp(t(imagedata.in[, (offset + 1):ncol(imagedata.in)]), cor = FALSE, scores = TRUE, covmat = NULL ) labs.all <- as.numeric(as.vector(imagedata.in[, 1])) for (i in 1:PCnum) { loadings <- pca$loadings[, i] loadings <- cbind(loadings, labs.all) write.csv(loadings, paste("loadings_PC", i, ".csv")) } } #' make ion slice if requested if (slice == T) { imageSlice(row, imagedata.in = image.process, scale, x.cood, y.cood, nlevels, name = image.process[row, 1], subname = image.process[row, 2], offset = 4, res.spatial, rem.outliers, summary, title ) } #' perform clustering if requested if (cluster.k == T) { cluster(cluster.type, imagedata.in = image.process, offset, width = x.cood, res.spatial, height = y.cood, clusters ) } #' returns normalised data file, also printed as a csv if (process == T) { return(image.norm) } }