Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
diff annotationRmn2DGlobale.R @ 3:546c7ccd2ed4 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author | workflow4metabolomics |
---|---|
date | Fri, 04 Feb 2022 09:01:11 +0000 |
parents | dff7bde22102 |
children |
line wrap: on
line diff
--- a/annotationRmn2DGlobale.R Tue Feb 04 10:59:26 2020 -0500 +++ b/annotationRmn2DGlobale.R Fri Feb 04 09:01:11 2022 +0000 @@ -1,28 +1,25 @@ -########################################################################################################################################### -# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) RMN # -# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # -# cosy : 1 si sequence a utiliser / 0 sinon # -# hmbc : 1 si sequence a utiliser / 0 sinon # -# hsqc : 1 si sequence a utiliser / 0 sinon # -# jres : 1 si sequence a utiliser / 0 sinon # -# tocsy : 1 si sequence a utiliser / 0 sinon # -# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # -# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # -# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # -# seuil : valeur du score de presence en deça de laquelle les metabolites annotes ne sont pas retenus # -# unicite : boolean pour ne retenir que les ... # -########################################################################################################################################### +################################################################################################### +# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) # +# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # +# cosy : 1 si sequence a utiliser / 0 sinon # +# hmbc : 1 si sequence a utiliser / 0 sinon # +# hsqc : 1 si sequence a utiliser / 0 sinon # +# jres : 1 si sequence a utiliser / 0 sinon # +# tocsy : 1 si sequence a utiliser / 0 sinon # +# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # +# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # +# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # +# seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # +# unicite : boolean pour ne retenir que les ... # +################################################################################################### ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) -mean.rmNa <- function(x) -{ - mean(x, na.rm=TRUE) +mean.rmNa <- function(x) { + mean(x, na.rm = TRUE) } -annotationRmn2DGlobale <- function(template, tolPpm1=0.01, tolPpm2HJRes=0.002, tolPpm2C=0.5, cosy=1, hmbc=1, hsqc=1, jres=1, tocsy=1, - seuil, unicite="NO") -{ +annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { ## Initialisation - options (max.print=999999999) + options(max.print = 999999999) annotationCOSY <- data.frame() annotationHMBC <- data.frame() annotationHSQC <- data.frame() @@ -34,88 +31,117 @@ dataHSQC <- "NA" dataJRES <- "NA" dataTOCSY <- "NA" - + ## Application seuil seulement si annotation avec 1 seule sequence -## seuilPls2D <- 0 -## if ((sum(cosy, hmbc, hsqc, jres, tocsy)) == 1) -## seuilPls2D <- seuil seuilPls2D <- seuil - - if (cosy == 1) - { - matrice.cosy <- read.xlsx(template, sheet="COSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") + + if (cosy == 1) { + matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] - annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, - unicite=unicite) - dataCOSY <- data.frame(Metabolite=str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY=annotationCOSY$liste_resultat$score) + annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) + dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) dataCOSY <- unique.data.frame(dataCOSY) } - - if (hmbc == 1) - { - matrice.hmbc <- read.xlsx(template, sheet="HMBC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") + + if (hmbc == 1) { + matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] - annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, - unicite=unicite) - dataHMBC <- data.frame(Metabolite=str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC=annotationHMBC$liste_resultat$score) + annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) + dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) dataHMBC <- unique.data.frame(dataHMBC) } - if (hsqc == 1) - { - matrice.hsqc <- read.xlsx(template, sheet="HSQC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") + if (hsqc == 1) { + matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] - annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, - unicite=unicite) - dataHSQC <- data.frame(Metabolite=str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC=annotationHSQC$liste_resultat$score) + annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) + dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) dataHSQC <- unique.data.frame(dataHSQC) } - - if (jres == 1) - { - matrice.jres <- read.xlsx(template, sheet="JRES", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") + + if (jres == 1) { + matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] - annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2HJRes, seuil=seuilPls2D, - unicite=unicite) - dataJRES <- data.frame(Metabolite=str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES=annotationJRES$liste_resultat$score) + annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) + dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) dataJRES <- unique.data.frame(dataJRES) } - - if (tocsy == 1) - { - matrice.tocsy <- read.xlsx(template, sheet="TOCSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") + + if (tocsy == 1) { + matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] - annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, - unicite=unicite) - dataTOCSY <- data.frame(Metabolite=str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY=annotationTOCSY$liste_resultat$score) + annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) + dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) dataTOCSY <- unique.data.frame(dataTOCSY) } - sequencesCombinationAverageScoreSeuil <- data.frame() - sequencesCombinationAverageScoreSeuilFiltre <- data.frame() - + seqCombiMeanScoreSeuil <- data.frame() + seqCombiMeanScoreSeuilFiltre <- data.frame() + ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) whichSequenceNaN <- which((data2D != "NA")) data2D <- data2D[whichSequenceNaN] sequencesCombination <- data.frame(data2D[1]) - sequencesCombinationAverageScore <- sequencesCombination - + seqCombiMeanScore <- sequencesCombination + ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D - if (length(data2D) >= 2) - { + if (length(data2D) >= 2) { ## CONCATENATION SCORE PAR SEQUENCE for (l in 2:length(data2D)) - sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by="Metabolite", all.x=TRUE, all.y=TRUE) - - ## SCORE MOYEN (sans prise en compte valeurs manquantes) - meanScore <- apply(sequencesCombination[, -1], 1, FUN=mean.rmNa) - sequencesCombinationAverageScore <- cbind.data.frame(sequencesCombination, averageScore=meanScore) - ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL -## sequencesCombinationAverageScoreSeuilFiltre <- filter(sequencesCombinationAverageScore, averageScore >= seuil) - sequencesCombinationAverageScoreSeuilFiltre <- sequencesCombinationAverageScore[sequencesCombinationAverageScore$averageScore > seuil, ] + sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) + + ## Replacement of NA values due to mis annotation + for (m in seq_len(nrow(sequencesCombination))) { + COSYcompound <- sort(names(BdDReference_COSY)) + HMBCcompound <- sort(names(BdDReference_HMBC)) + HSQCcompound <- sort(names(BdDReference_HSQC)) + JREScompound <- sort(names(BdDReference_JRES)) + TOCSYcompound <- sort(names(BdDReference_TOCSY)) + + if (is.na(sequencesCombination[m, 2])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(COSYcompound))) + if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) + sequencesCombination[m, 2] <- 0 + } + + if (is.na(sequencesCombination[m, 3])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(HMBCcompound))) + if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) + sequencesCombination[m, 3] <- 0 + } + + if (is.na(sequencesCombination[m, 4])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(HSQCcompound))) + if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) + sequencesCombination[m, 4] <- 0 + } + + if (is.na(sequencesCombination[m, 5])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(JREScompound))) + if (str_to_lower(compound) == str_to_lower(JREScompound[c])) + sequencesCombination[m, 5] <- 0 + } + + if (is.na(sequencesCombination[m, 6])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(TOCSYcompound))) + if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) + sequencesCombination[m, 6] <- 0 + } } - return(list(COSY=annotationCOSY, HMBC=annotationHMBC, HSQC=annotationHSQC, JRES=annotationJRES, TOCSY=annotationTOCSY, - combination=sequencesCombinationAverageScoreSeuilFiltre)) + ## SCORE MOYEN (sans prise en compte valeurs manquantes) + meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) + seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) + + ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL + seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] + } + + return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) }