2dnmrannotation: annotationRmn2DGlobale.R comparison

comparison annotationRmn2DGlobale.R @ 3:546c7ccd2ed4 draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"

author	workflow4metabolomics
date	Fri, 04 Feb 2022 09:01:11 +0000
parents	dff7bde22102
children

comparison

equal deleted inserted replaced

-:dff7bde22102
+:546c7ccd2ed4
-###########################################################################################################################################
+###################################################################################################
-# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) RMN                                                     #
+# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s)                 #
-# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter                          #
+# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter									                          #
-# cosy : 1 si sequence a utiliser / 0 sinon                                                                                               #
+# cosy : 1 si sequence a utiliser / 0 sinon                                                       #
-# hmbc : 1 si sequence a utiliser / 0 sinon                                                                                               #
+# hmbc : 1 si sequence a utiliser / 0 sinon                                                       #
-# hsqc : 1 si sequence a utiliser / 0 sinon                                                                                               #
+# hsqc : 1 si sequence a utiliser / 0  sinon                                                      #
-# jres : 1 si sequence a utiliser / 0 sinon                                                                                               #
+# jres : 1 si sequence a utiliser / 0 sinon                                                       #
-# tocsy : 1 si sequence a utiliser / 0 sinon                                                                                              #
+# tocsy : 1 si sequence a utiliser / 0 sinon                                                      #
-# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques                                                  #
+# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques          #
-# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2                       #
+# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2                       								          #
-# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2                           #
+# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2                           									  #
-# seuil : valeur du score de presence en deça de laquelle les metabolites annotes ne sont pas retenus                                     #
+# seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus #
-# unicite : boolean pour ne retenir que les ...                                                                                           #
+# unicite : boolean pour ne retenir que les ...                                                   #
-###########################################################################################################################################
+###################################################################################################
 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S)
-mean.rmNa <- function(x)
+mean.rmNa <- function(x) {
-{
+mean(x, na.rm = TRUE)
-mean(x, na.rm=TRUE)
 }
-annotationRmn2DGlobale <- function(template, tolPpm1=0.01, tolPpm2HJRes=0.002, tolPpm2C=0.5, cosy=1, hmbc=1, hsqc=1, jres=1, tocsy=1,
+annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") {
-seuil, unicite="NO")
-{
 ## Initialisation
-options (max.print=999999999)
+options(max.print = 999999999)
 annotationCOSY <- data.frame()
 annotationHMBC <- data.frame()
 annotationHSQC <- data.frame()
 annotationJRES <- data.frame()
 annotationTOCSY <- data.frame()
 dataCOSY <- "NA"
 dataHMBC <- "NA"
 dataHSQC <- "NA"
 dataJRES <- "NA"
 dataTOCSY <- "NA"
 ## Application seuil seulement si annotation avec 1 seule sequence
-##   seuilPls2D <- 0
-##   if ((sum(cosy, hmbc, hsqc, jres, tocsy)) == 1)
-##     seuilPls2D <- seuil
 seuilPls2D <- seuil
-if (cosy == 1)
+if (cosy == 1) {
-{
+matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
-matrice.cosy <- read.xlsx(template, sheet="COSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA")
 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ]
-annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D,
+annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite)
-unicite=unicite)
+dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score)
-dataCOSY <- data.frame(Metabolite=str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY=annotationCOSY$liste_resultat$score)
 dataCOSY <- unique.data.frame(dataCOSY)
 }
-if (hmbc == 1)
+if (hmbc == 1) {
-{
+matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
-matrice.hmbc <- read.xlsx(template, sheet="HMBC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA")
 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ]
-annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D,
+annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite)
-unicite=unicite)
+dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score)
-dataHMBC <- data.frame(Metabolite=str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC=annotationHMBC$liste_resultat$score)
 dataHMBC <- unique.data.frame(dataHMBC)
 }
-if (hsqc == 1)
+if (hsqc == 1) {
-{
+matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
-matrice.hsqc <- read.xlsx(template, sheet="HSQC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA")
 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ]
-annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D,
+annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite)
-unicite=unicite)
+dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score)
-dataHSQC <- data.frame(Metabolite=str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC=annotationHSQC$liste_resultat$score)
 dataHSQC <- unique.data.frame(dataHSQC)
 }
-if (jres == 1)
+if (jres == 1) {
-{
+matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
-matrice.jres <- read.xlsx(template, sheet="JRES", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA")
 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ]
-annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2HJRes, seuil=seuilPls2D,
+annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite)
-unicite=unicite)
+dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score)
-dataJRES <- data.frame(Metabolite=str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES=annotationJRES$liste_resultat$score)
 dataJRES <- unique.data.frame(dataJRES)
 }
-if (tocsy == 1)
+if (tocsy == 1) {
-{
+matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
-matrice.tocsy <- read.xlsx(template, sheet="TOCSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA")
 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ]
-annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D,
+annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite)
-unicite=unicite)
+dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score)
-dataTOCSY <- data.frame(Metabolite=str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY=annotationTOCSY$liste_resultat$score)
 dataTOCSY <- unique.data.frame(dataTOCSY)
 }
-sequencesCombinationAverageScoreSeuil <- data.frame()
+seqCombiMeanScoreSeuil <- data.frame()
-sequencesCombinationAverageScoreSeuilFiltre <- data.frame()
+seqCombiMeanScoreSeuilFiltre <- data.frame()
 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES
 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY)
 whichSequenceNaN <- which((data2D != "NA"))
 data2D <- data2D[whichSequenceNaN]
 sequencesCombination <- data.frame(data2D[1])
-sequencesCombinationAverageScore <- sequencesCombination
+seqCombiMeanScore <- sequencesCombination
 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D
-if (length(data2D) >= 2)
+if (length(data2D) >= 2) {
-{
 ## CONCATENATION SCORE PAR SEQUENCE
 for (l in 2:length(data2D))
-sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by="Metabolite", all.x=TRUE, all.y=TRUE)
+sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE)
-## SCORE MOYEN (sans prise en compte valeurs manquantes)
+## Replacement of NA values due to mis annotation
-meanScore <- apply(sequencesCombination[, -1], 1, FUN=mean.rmNa)
+for (m in seq_len(nrow(sequencesCombination))) {
-sequencesCombinationAverageScore <- cbind.data.frame(sequencesCombination, averageScore=meanScore)
+COSYcompound <- sort(names(BdDReference_COSY))
-## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL
+HMBCcompound <- sort(names(BdDReference_HMBC))
-##    sequencesCombinationAverageScoreSeuilFiltre <- filter(sequencesCombinationAverageScore, averageScore >= seuil)
+HSQCcompound <- sort(names(BdDReference_HSQC))
-sequencesCombinationAverageScoreSeuilFiltre <- sequencesCombinationAverageScore[sequencesCombinationAverageScore$averageScore > seuil, ]
+JREScompound <- sort(names(BdDReference_JRES))
+TOCSYcompound <- sort(names(BdDReference_TOCSY))
+if (is.na(sequencesCombination[m, 2])) {
+compound <- as.character(sequencesCombination[m, 1])
+for (c in seq_len(length(COSYcompound)))
+if (str_to_lower(compound) == str_to_lower(COSYcompound[c]))
+sequencesCombination[m, 2] <- 0
+}
+if (is.na(sequencesCombination[m, 3])) {
+compound <- as.character(sequencesCombination[m, 1])
+for (c in seq_len(length(HMBCcompound)))
+if (str_to_lower(compound) == str_to_lower(HMBCcompound[c]))
+sequencesCombination[m, 3] <- 0
+}
+if (is.na(sequencesCombination[m, 4])) {
+compound <- as.character(sequencesCombination[m, 1])
+for (c in seq_len(length(HSQCcompound)))
+if (str_to_lower(compound) == str_to_lower(HSQCcompound[c]))
+sequencesCombination[m, 4] <- 0
+}
+if (is.na(sequencesCombination[m, 5])) {
+compound <- as.character(sequencesCombination[m, 1])
+for (c in seq_len(length(JREScompound)))
+if (str_to_lower(compound) == str_to_lower(JREScompound[c]))
+sequencesCombination[m, 5] <- 0
+}
+if (is.na(sequencesCombination[m, 6])) {
+compound <- as.character(sequencesCombination[m, 1])
+for (c in seq_len(length(TOCSYcompound)))
+if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c]))
+sequencesCombination[m, 6] <- 0
+}
 }
-return(list(COSY=annotationCOSY, HMBC=annotationHMBC, HSQC=annotationHSQC, JRES=annotationJRES, TOCSY=annotationTOCSY,
+## SCORE MOYEN (sans prise en compte valeurs manquantes)
-combination=sequencesCombinationAverageScoreSeuilFiltre))
+meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2)
+seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore)
+## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL
+seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ]
+}
+return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre))
 }

Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation

comparison annotationRmn2DGlobale.R @ 3:546c7ccd2ed4 draft default tip