# HG changeset patch # User lain # Date 1649943053 0 # Node ID d12c213ea7c157a442bddeffab5eede970e015c9 # Parent 67733206be531fe8632a70b27ee772d651eba846 " master branch Updating" diff -r 67733206be53 -r d12c213ea7c1 moulinetteJF/MSpurity_postprocess.zip Binary file moulinetteJF/MSpurity_postprocess.zip has changed diff -r 67733206be53 -r d12c213ea7c1 moulinetteJF/acetamiprid-N-desmethyl_correl.pdf Binary file moulinetteJF/acetamiprid-N-desmethyl_correl.pdf has changed diff -r 67733206be53 -r d12c213ea7c1 moulinetteJF/retraitement_MSpurity_V4.R --- a/moulinetteJF/retraitement_MSpurity_V4.R Thu Apr 14 10:23:15 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,223 +0,0 @@ -## read and process mspurity W4M files -## create a summary of fragment for each precursor and a graphics of peseudo spectra + correlation on which checking of fragment is based on -## V3 try to identify and process multiple files for 1 precursor which may occur if different collision energy are used -## V4 elimination of correlation = NA. Correlation is done with precursor, if precursor is not present correlation with most intense peak - -##require(msPurity) - -########################################################################################### - -plot_pseudoSpectra <- function(x, fid, sumInt, vmz, corAbsInt, refcol, cNAME) { - ## function to compute sum of intensities among scans for all m/z kept (cor > r_threshold & minimum number of scans) - ## and plot pseudo spectra - ## x dataframe scan X fragments with scans number in the 1st column and ions in next with intensities - ## fid file id when several a precursor has been detected in several files - # du fait de la difference de nombre de colonne entre la dataframe qui inclue les scans en 1ere col, mzRef se decale de 1 - refcol <- refcol-1 - ## compute relative intensities max=100% - relInt <- sumInt[-1] - relInt <- relInt/max(relInt) - - ## define max value on vertical axis (need to increase in order to plot the label of fragments) - ymax <- max(relInt)+0.2*max(relInt) - - par(mfrow=c(2,1)) - plot(vmz, relInt, type="h", ylim=c(0,ymax), main = cNAME) - ## low correl coef. will be display in grey - corLow <- which(round(corAbsInt,2) < r_threshold) - - lbmzcor <- paste(as.character(vmz),"(r=",round(corAbsInt,2),")", sep="") - - if (length(corLow) > 0) { - text(vmz[corLow], relInt[corLow], lbmzcor[corLow], cex=0.5, col = "grey", srt = 90 , adj=0) - if (length(vmz) - length(corLow) > 1) text(vmz[-c(refcol,corLow)], relInt[-c(refcol,corLow)], lbmzcor[-c(refcol,corLow)], cex=0.6, col = 1, srt = 90 , adj=0) - } else { - if (length(vmz) > 1) text(vmz[-c(refcol)], relInt[-c(refcol)], lbmzcor[-c(refcol)], cex=0.6, col = 1, srt = 90 , adj=0) - } - - text(vmz[refcol], relInt[refcol], lbmzcor[refcol], cex=0.8, col = 2, srt = 90, adj=0 ) - - ## prepare result file - corValid <- (round(corAbsInt,2) >= r_threshold) - cpRes <- data.frame(rep(cNAME, length(vmz)), - rep(fid, length(vmz)), - vmz,corAbsInt,sumInt[-1],relInt,corValid) - - colnames(cpRes) <- c("compoundName","fileid","fragments_mz","CorWithPrecursor","AbsoluteIntensity","relativeIntensity","corValid") - return(cpRes) - -} - -## function for extraction of fragments corresponding to precursors detected by MSPurity -Xtract_fragments <- function(mzref, rtref, cNAME, r_threshold = 0.85, seuil_ra = 0.1, tolmz = 0.01, tolrt = 60) { - - ## filter precursor in the precursors file based on mz and rt in the compound list - cat("processing ",cNAME,"\n") - selprec <- which((abs(prec$precurMtchMZ - mzref) <= tolmz) & (abs(prec$precurMtchRT - rtref) <= tolrt)) - - ## check if there is the precursor in the file - if (length(selprec) > 0) { - - sprecini <- prec[selprec,] - - ## check if fragments corresponding to precursor are found in several files (collision energy) - ## this lead to a processing for each fileid - mf <- levels(as.factor(sprecini$fileid)) - nbf <- length(mf) - if (nbf > 1) cat(" several files detected for this compounds :\n") - - for (f in 1:nbf) { - - sprec <- sprecini[sprecini$fileid == mf[f],] - - ## selection of fragment in the fragments file with the grpid common in both fragments and precursors - selfrgt <- levels(as.factor(sprec$grpid)) - sfrgt <- frgt[frgt$grpid %in% selfrgt & frgt$fileid == mf[f],] - - ## filter fragments on relative intensity seuil_ra = user defined parameter (MSpurity flags could be used here) - sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra,] - - mznominal <- round(x = sfrgtfil$mz, mzdecimal) - sfrgtfil <- data.frame(sfrgtfil, mznominal) - - ## creation of cross table row=scan col=mz X=ra - vmz <- levels(as.factor(sfrgtfil$mznominal)) - #fscan <- levels(as.factor(sfrgtfil$acquisitionNum)) - - cat(" fragments :",vmz) - # dsIntra <- matrix(NA,nrow = length(vscan), ncol = length(vmz)) - # rownames(dsIntra) <- fscan - # dsIntra <- data.frame(fscan,dsIntra) - # colnames(dsIntra) <- c("fscan",vmz) - - ## mz of precursor in data precursor to check correlation with - mzPrec <- paste("mz",round(mean(sprec$mz),mzdecimal),sep="") - - for (m in 1:length(vmz)) { - - ## absolute intensity - cln <- c(which(colnames(sfrgtfil) == "acquisitionNum"), which(colnames(sfrgtfil) == "i")) - int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln] - colnames(int_mz)[2] <- paste("mz", vmz[m], sep="") - - ## average intensities of mass in duplicate scans - compScans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean) - int_mz <- compScans[,-1] - - if (m == 1) { - dsAbsInt <- int_mz - #dsRelInt <- ra_mz - } else { - dsAbsInt <- merge(x = dsAbsInt, y = int_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE) - #dsRelInt <- merge(x = dsRelInt, y = ra_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE) - - } - } - ## for debug - ## write.table(x = dsAbsInt,file=paste(cNAME,"dsAbsInt.txt",sep=""), row.names = FALSE, sep="\t") - - ## elimination of mz with less than minNumberScan scans (user defined parameter) - xmz <- rep(NA,ncol(dsAbsInt)-1) - sumInt <- rep(NA,ncol(dsAbsInt)) - nbxmz <- 0 - NbScanCheck <- min(nrow(dsAbsInt),minNumberScan) - - for (j in 2:ncol(dsAbsInt)) { - sumInt[j] <- sum(dsAbsInt[j],na.rm = TRUE) - if (sum(!is.na(dsAbsInt[[j]])) < NbScanCheck) { - nbxmz <- nbxmz + 1 - xmz[nbxmz] <- j - } - } - - xmz <- xmz[-which(is.na(xmz))] - if (length(xmz) > 0) { - dsAbsInt <- dsAbsInt[,-c(xmz)] - sumInt <- sumInt[-c(xmz)] - ## liste des mz keeped decale de 1 avec dsAbsInt - vmz <- as.numeric(vmz[-c(xmz-1)]) - } - - ## reference ion for correlation computing = precursor OR maximum intensity ion in precursor is not present - refcol <- which(colnames(dsAbsInt) == mzPrec) - if (length(refcol) == 0) refcol <- which(sumInt == max(sumInt, na.rm = TRUE)) - - pdf(file=paste(cNAME,"_processing_file",mf[f],".pdf",sep=""), width = 8, height = 11 ); par(mfrow=c(3,2)) - - ## pearson correlations between absolute intensities computing - corAbsInt <- rep(NA,length(vmz)) - - if (length(refcol) > 0) { - for (i in 2:length(dsAbsInt)) { - corAbsInt[i-1] <- cor(x = dsAbsInt[[refcol]], y=dsAbsInt[[i]], use = "pairwise.complete.obs", method = "pearson") - plot(dsAbsInt[[refcol]],dsAbsInt[[i]], - xlab = colnames(dsAbsInt)[refcol], ylab=colnames(dsAbsInt)[i], - main=paste(cNAME," corr coeff r=",round(corAbsInt[i-1],2),sep="")) - } - ## plot pseudo spectra - resCompByfile <- plot_pseudoSpectra(x= dsAbsInt, fid = mf[f], sumInt = sumInt, vmz = vmz, corAbsInt = corAbsInt, refcol = refcol, cNAME = cNAME ) - if (f == 1) resComp <- resCompByfile - } else { - resCompByfile <- NULL - cat(" non detected in fragments file \n") - } - if (!is.null(resCompByfile)) resComp <- rbind(resComp,resCompByfile) - cat("\n") - dev.off() - } - } else { - resComp <- NULL - cat(" non detected in precursor file \n") - } - - return(resComp) -} - - -#################################### start ####################################################### - -## FOLDER AND FILES -setwd(dir = "C:/Users/jfmartin/Documents/PROJETS/PROG/tools_R/MSPurity/positif2/V2") - -#load("Galaxy541-[msPurity.filterFragSpectra_on_data_540__RData].rdata") - -## MSpurity precursors file -prec <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_precursors.tsv", - header = TRUE, sep="\t") - -## MSpurity fragments file -frgt <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_fragments.tsv", - header = TRUE, sep="\t") - -## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time -compounds <- read.table(file = "compounds_pos.txt", sep="\t", header = TRUE) - -## PARAMETERS -## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds list with the precursor MSpurity file -tolmz <- 0.01 -tolrt <- 20 - -## relative intensity threshold -seuil_ra = 0.05 -## nb decimal for mz -mzdecimal <- 0 -## r pearson correlation threshold between precursor and fragment absolute intensity -r_threshold <- 0.85 -## fragments are kept if there are found in a minimum number of scans -minNumberScan <- 8 - -for (i in 1:nrow(compounds)) { - ## loop execution for all compounds in the compounds file - resCor <- NULL - resCor <- Xtract_fragments(mzref = compounds[[2]][i], - rtref = compounds[[3]][i] * 60, - cNAME = compounds[[1]][i], - r_threshold, seuil_ra, tolmz, tolrt) - if (i == 1 & !is.null(resCor)) resAll <- resCor else if (!is.null(resCor)) resAll <- rbind(resAll,resCor) - - } - -write.table(x = resAll, file = "compound_fragments_result.txt", sep="\t", row.names = FALSE) - -######################################## end call ################################################ -