changeset 1:d12c213ea7c1 draft

" master branch Updating"
author lain
date Thu, 14 Apr 2022 13:30:53 +0000
parents 67733206be53
children e1c0f63872c4
files moulinetteJF/MSpurity_postprocess.zip moulinetteJF/acetamiprid-N-desmethyl_correl.pdf moulinetteJF/retraitement_MSpurity_V4.R
diffstat 3 files changed, 0 insertions(+), 223 deletions(-) [+]
line wrap: on
line diff
Binary file moulinetteJF/MSpurity_postprocess.zip has changed
Binary file moulinetteJF/acetamiprid-N-desmethyl_correl.pdf has changed
--- a/moulinetteJF/retraitement_MSpurity_V4.R	Thu Apr 14 10:23:15 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,223 +0,0 @@
-## read and process mspurity W4M files
-## create a summary of fragment for each precursor and a graphics of peseudo spectra + correlation on which checking of fragment is based on
-## V3 try to identify and process multiple files for 1 precursor which may occur if different collision energy are used
-## V4 elimination of correlation = NA. Correlation is done with precursor, if precursor is not present correlation with most intense peak
-
-##require(msPurity)
-
-###########################################################################################
-
-plot_pseudoSpectra <- function(x, fid, sumInt, vmz, corAbsInt, refcol, cNAME) {
-  ## function to compute sum of intensities among scans for all m/z kept (cor > r_threshold & minimum number of scans) 
-  ## and plot pseudo spectra
-  ## x dataframe scan X fragments with scans number in the 1st column and ions in next with intensities
-  ## fid file id when several a precursor has been detected in several files
-  # du fait de la difference de nombre de colonne entre la dataframe qui inclue les scans en 1ere col, mzRef se decale de 1
-  refcol <- refcol-1
-  ## compute relative intensities max=100%
-  relInt <- sumInt[-1]
-  relInt <- relInt/max(relInt)
-  
-  ## define max value on vertical axis (need to increase in order to plot the label of fragments)
-  ymax <- max(relInt)+0.2*max(relInt)
-  
-  par(mfrow=c(2,1))
-  plot(vmz, relInt, type="h", ylim=c(0,ymax), main = cNAME)
-  ## low correl coef. will be display in grey
-  corLow <- which(round(corAbsInt,2) < r_threshold)
-  
-  lbmzcor <- paste(as.character(vmz),"(r=",round(corAbsInt,2),")", sep="")
-  
-  if (length(corLow) > 0) {
-    text(vmz[corLow], relInt[corLow], lbmzcor[corLow], cex=0.5, col = "grey", srt = 90 , adj=0)
-    if (length(vmz) - length(corLow) > 1) text(vmz[-c(refcol,corLow)], relInt[-c(refcol,corLow)], lbmzcor[-c(refcol,corLow)], cex=0.6, col = 1, srt = 90 , adj=0)
-  } else {
-    if (length(vmz) > 1) text(vmz[-c(refcol)], relInt[-c(refcol)], lbmzcor[-c(refcol)], cex=0.6, col = 1, srt = 90 , adj=0)
-  }
-  
-  text(vmz[refcol], relInt[refcol], lbmzcor[refcol], cex=0.8, col = 2, srt = 90, adj=0 )
-  
-  ## prepare result file
-  corValid <- (round(corAbsInt,2) >= r_threshold)
-  cpRes <- data.frame(rep(cNAME, length(vmz)),
-                      rep(fid, length(vmz)),
-                      vmz,corAbsInt,sumInt[-1],relInt,corValid)
-  
-  colnames(cpRes) <- c("compoundName","fileid","fragments_mz","CorWithPrecursor","AbsoluteIntensity","relativeIntensity","corValid")
-  return(cpRes)
-  
-}
-
-## function for extraction of fragments corresponding to precursors detected by MSPurity
-Xtract_fragments <- function(mzref, rtref, cNAME, r_threshold = 0.85, seuil_ra = 0.1, tolmz = 0.01, tolrt = 60) {
-  
-  ## filter precursor in the precursors file based on mz and rt in the compound list
-  cat("processing ",cNAME,"\n")
-  selprec <- which((abs(prec$precurMtchMZ - mzref) <= tolmz) & (abs(prec$precurMtchRT - rtref) <= tolrt))
-  
-  ## check if there is the precursor in the file
-  if (length(selprec) > 0) {
-    
-    sprecini <- prec[selprec,]
-    
-    ## check if fragments corresponding to precursor are found in several files (collision energy)
-    ## this lead to a processing for each fileid 
-    mf <- levels(as.factor(sprecini$fileid))
-    nbf <- length(mf)
-    if (nbf > 1) cat(" several files detected for this compounds :\n")
-    
-    for (f in 1:nbf) {
-      
-      sprec <- sprecini[sprecini$fileid == mf[f],]
-      
-      ## selection of fragment in the fragments file with the grpid common in both fragments and precursors
-      selfrgt <- levels(as.factor(sprec$grpid))
-      sfrgt <- frgt[frgt$grpid %in% selfrgt & frgt$fileid == mf[f],]
-      
-      ## filter fragments on relative intensity seuil_ra = user defined parameter (MSpurity flags could be used here)
-      sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra,]
-      
-      mznominal <- round(x = sfrgtfil$mz, mzdecimal)
-      sfrgtfil <- data.frame(sfrgtfil, mznominal)
-      
-      ## creation of cross table row=scan col=mz X=ra
-      vmz <- levels(as.factor(sfrgtfil$mznominal))
-      #fscan <- levels(as.factor(sfrgtfil$acquisitionNum))
-      
-      cat(" fragments :",vmz)
-      # dsIntra <- matrix(NA,nrow = length(vscan), ncol = length(vmz))
-      # rownames(dsIntra) <- fscan
-      # dsIntra <- data.frame(fscan,dsIntra)
-      # colnames(dsIntra) <- c("fscan",vmz)
-      
-      ## mz of precursor in data precursor to check correlation with
-      mzPrec <- paste("mz",round(mean(sprec$mz),mzdecimal),sep="")
-      
-      for (m in 1:length(vmz)) {
-        
-        ## absolute intensity
-        cln <- c(which(colnames(sfrgtfil) == "acquisitionNum"), which(colnames(sfrgtfil) == "i"))
-        int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln]
-        colnames(int_mz)[2] <- paste("mz", vmz[m], sep="")
-        
-        ## average intensities of mass in duplicate scans
-        compScans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
-        int_mz <- compScans[,-1]
-        
-        if (m == 1) {
-          dsAbsInt <- int_mz
-          #dsRelInt <- ra_mz
-        } else {
-          dsAbsInt <- merge(x = dsAbsInt, y = int_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE)
-          #dsRelInt <- merge(x = dsRelInt, y = ra_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE)
-          
-        }
-      }
-      ## for debug
-      ## write.table(x = dsAbsInt,file=paste(cNAME,"dsAbsInt.txt",sep=""), row.names = FALSE, sep="\t")
-      
-      ## elimination of mz with less than minNumberScan scans (user defined parameter)
-      xmz <- rep(NA,ncol(dsAbsInt)-1)
-      sumInt <- rep(NA,ncol(dsAbsInt))
-      nbxmz <- 0
-      NbScanCheck <- min(nrow(dsAbsInt),minNumberScan)
-      
-      for (j in 2:ncol(dsAbsInt)) {
-        sumInt[j] <- sum(dsAbsInt[j],na.rm = TRUE)
-        if (sum(!is.na(dsAbsInt[[j]])) < NbScanCheck) {
-          nbxmz <- nbxmz + 1
-          xmz[nbxmz] <- j
-        }
-      }
-      
-      xmz <- xmz[-which(is.na(xmz))]
-      if (length(xmz) > 0) {
-        dsAbsInt <- dsAbsInt[,-c(xmz)]
-        sumInt <- sumInt[-c(xmz)]
-        ## liste des mz keeped decale de 1 avec dsAbsInt
-        vmz <- as.numeric(vmz[-c(xmz-1)])
-      }
- 
-      ## reference ion for correlation computing = precursor OR maximum intensity ion in precursor is not present
-      refcol <- which(colnames(dsAbsInt) == mzPrec)
-      if (length(refcol) == 0) refcol <- which(sumInt == max(sumInt, na.rm = TRUE))
-      
-      pdf(file=paste(cNAME,"_processing_file",mf[f],".pdf",sep=""), width = 8, height = 11 ); par(mfrow=c(3,2))
-      
-      ## pearson correlations between absolute intensities computing
-      corAbsInt <- rep(NA,length(vmz))
-      
-      if (length(refcol) > 0) {
-        for (i in 2:length(dsAbsInt)) {
-          corAbsInt[i-1] <- cor(x = dsAbsInt[[refcol]], y=dsAbsInt[[i]], use = "pairwise.complete.obs", method = "pearson")
-          plot(dsAbsInt[[refcol]],dsAbsInt[[i]], 
-               xlab = colnames(dsAbsInt)[refcol], ylab=colnames(dsAbsInt)[i], 
-               main=paste(cNAME," corr coeff r=",round(corAbsInt[i-1],2),sep=""))
-        }
-        ## plot pseudo spectra
-        resCompByfile <- plot_pseudoSpectra(x= dsAbsInt, fid = mf[f], sumInt = sumInt, vmz = vmz, corAbsInt = corAbsInt, refcol = refcol, cNAME = cNAME )
-        if (f == 1) resComp <- resCompByfile
-      } else {
-        resCompByfile <- NULL
-        cat(" non detected in fragments file \n")
-      }
-      if (!is.null(resCompByfile)) resComp <- rbind(resComp,resCompByfile)
-      cat("\n")
-      dev.off()
-    }
-  } else {
-    resComp <- NULL
-    cat(" non detected in precursor file \n")
-  }
-  
-  return(resComp)
-}
-
-
-#################################### start  #######################################################
-
-## FOLDER AND FILES
-setwd(dir = "C:/Users/jfmartin/Documents/PROJETS/PROG/tools_R/MSPurity/positif2/V2")
-
-#load("Galaxy541-[msPurity.filterFragSpectra_on_data_540__RData].rdata")
-
-## MSpurity precursors file
-prec <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_precursors.tsv",
-                   header = TRUE, sep="\t")
-
-## MSpurity fragments file
-frgt <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_fragments.tsv",
-                   header = TRUE, sep="\t")
-
-## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time
-compounds <- read.table(file = "compounds_pos.txt", sep="\t", header = TRUE)
-
-## PARAMETERS
-## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds list with the precursor MSpurity file
-tolmz <- 0.01
-tolrt <- 20
-
-##  relative intensity threshold
-seuil_ra = 0.05
-## nb decimal for mz
-mzdecimal <- 0
-## r pearson correlation threshold between precursor and fragment absolute intensity
-r_threshold <- 0.85
-## fragments are kept if there are found in a minimum number of scans
-minNumberScan <- 8
-
-for (i in 1:nrow(compounds)) {
-  ## loop execution for all compounds in the compounds file
-  resCor <- NULL
-  resCor <- Xtract_fragments(mzref = compounds[[2]][i], 
-                             rtref = compounds[[3]][i] * 60,
-                             cNAME = compounds[[1]][i],
-                             r_threshold, seuil_ra, tolmz, tolrt)
-  if (i == 1 & !is.null(resCor)) resAll <- resCor else if (!is.null(resCor)) resAll <- rbind(resAll,resCor)
-
-  }
-
-write.table(x = resAll, file = "compound_fragments_result.txt", sep="\t", row.names = FALSE)
-
-########################################  end call ################################################
-