diff nmr_annotation2d/annotationRmn2D.R @ 0:8035235e46c7 draft

Uploaded
author marie-tremblay-metatoul
date Mon, 23 Dec 2019 09:26:20 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nmr_annotation2d/annotationRmn2D.R	Mon Dec 23 09:26:20 2019 -0500
@@ -0,0 +1,246 @@
+###########################################################################################################################################
+# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN                                                                       #
+# matriceComplexe : data.frame liste couples ppm de la matrice a annoter                                                                  #
+# BdDStandards : objet contenant la base de donnees des composes standards                                                                #
+# nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC")                                         #
+# ppm1Tol : tolerance ppm axe abscisses                                                                                                   #
+# ppm2Tol : tolerance ppm axe ordonnees                                                                                                   #
+# nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter                                                 #
+###########################################################################################################################################
+annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01, 
+                            seuil=0, unicite="NO")
+{
+  ## Longueur de la peak-list de la matrice a annoter
+  PeakListLength <- length(matriceComplexe[, 1])
+
+  ## Nombre de metabolites inclus dans BdD de composes standards
+  nbMetabolitesBdD <- length(BdDStandards)
+  matrixAnnotation <- data.frame()
+  allMetabolitesList <- data.frame()
+  seuil_score <- seuil
+  
+  ## Boucle sur les metabolites inclus dans BdD
+  for (i in 1:nbMetabolitesBdD)
+  {
+    ## Infos metabolite en cours
+    iMetabolite <- BdDStandards[[i]]
+    ppm1M <- iMetabolite[,1] 
+    ppm2M <- iMetabolite[,2]
+    nbPeakMetabolite <- length(ppm1M)
+    MetaboliteName <- names(BdDStandards[i])
+##    print(MetaboliteName)
+    ## Initialisation
+    k <- 0
+    presenceScore <- 0
+    annotatedPpmRef <- data.frame()
+    annotatedPpmList <- data.frame()
+    annotatedPeakLength <- 0
+    metabolites <- data.frame()
+    metabolitesList <- data.frame()
+  
+    ## Boucle sur les couples de pics de la matrice a annoter
+    for (p in 1:PeakListLength)
+    { 
+      ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3])
+      ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2])
+      e <- simpleMessage("end of file")
+      tryCatch({
+        if (!is.na(ppmAnnotationF1))
+        {
+          matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ]))
+        }
+        # Recherche du couple de pics de la matrice la liste des couples du metabolite standard
+        metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) & 
+                     ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol))
+        WhichMetaboliteIn <- which(metaboliteIn)
+        # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard
+        if (length(WhichMetaboliteIn) > 0)
+        {
+          for (a in 1:length(WhichMetaboliteIn))
+          {
+            annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite)
+            annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList)
+          }
+        }
+      }, error=function(e){cat ("End of file \n");})
+    }
+
+    # Au - 1 couple de ppm de la matrice complexe annote
+    if (nrow(annotatedPpmRef) >= 1)
+    {
+      ## Nombre couples annotes
+      annotatedPeakLength <- nrow(annotatedPpmRef)
+      
+      ## Recherche doublons
+      annotatedDoublons <- duplicated(annotatedPpmRef)
+      if (sum(duplicated(annotatedPpmRef)) > 0)
+      {
+        annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef))
+        annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ]
+      }
+      presenceScore <- annotatedPeakLength/nbPeakMetabolite
+    }
+    
+    ## Conservation metabolites dont score > seuil
+    if (presenceScore > seuil_score)
+    {
+      metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore)
+      metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) 
+      allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList)
+    }
+  }
+  
+  # Initialisation
+  commonPpm <- data.frame()
+  commonPpmList <- data.frame()
+  metaboliteAdd <- data.frame()
+  metaboliteAddList <- data.frame()
+#  metabolite_ref <- data.frame()
+  commonMetabolitesList <- data.frame()
+  commonMetabolitesPpmList <- data.frame()
+  commonMetabolitesPpmAllList1 <- data.frame()
+  commonMetabolitesPpmAllList <- data.frame()
+  listeTotale_2D_unicite <- allMetabolitesList[, 1:4]
+  allMetabolitesList <- allMetabolitesList[, -3]
+  metabolitesAllUnicite <- data.frame()
+  
+  ## Boucle sur tous couples annotes
+  for (j in 1:length(allMetabolitesList$ppm1))
+  {
+    ## Boucle sur metabolites dans BdD composes standards
+    for (i in 1:nbMetabolitesBdD)
+    {
+      ppmMetaboliteBdD <- BdDStandards[[i]]
+      ppm1M <- ppmMetaboliteBdD[,1] 
+      ppm2M <- ppmMetaboliteBdD[,2]
+      # Nombre de couples metabolite
+      nbPeakMetabolite <- length(ppm1M)
+      MetaboliteName <- names(BdDStandards[i])
+
+      metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) & 
+                            ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol))
+      WhichMetabolitesInAll <- which(metabolitesInAll)
+
+      if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0)
+      {
+        metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,])
+        commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2])
+        commonPpmList <- rbind.data.frame(commonPpmList, commonPpm)
+        commonPpmList <- unique(commonPpmList)
+        metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName)
+        metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd)
+#        metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3])
+        commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList)
+        commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList)
+        commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList)
+        commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1)
+      }
+    }
+    commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1)
+    commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList)
+    
+    #initialisation des data.frame
+    commonPpm <- data.frame()
+    metaboliteAdd <- data.frame()
+    metaboliteAddList <- data.frame()
+    metabolite_ref <- data.frame()
+    commonMetabolitesList <- data.frame()
+    commonMetabolitesPpmList <- data.frame()
+    commonMetabolitesPpmAllList1 <- data.frame()
+  }
+
+  unicityAllList <- listeTotale_2D_unicite
+  if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0)
+    unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite)
+
+  unicitynbCouplesRectif <- data.frame()
+  for (g in 1:nrow(unicityAllList))
+  {
+    metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g])
+    WhichMetaboliteUnicity <- which(metaboliteUnicity)
+    nb_occurence <- length(WhichMetaboliteUnicity)
+    unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence)
+  }
+  names(unicitynbCouplesRectif) <- "NbCouplesAnnotes"
+  unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif)
+  
+  unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength)
+  unicityAllList <- unicityAllList[, -3]
+  unicityAllList <- unicityAllList[, -4]
+
+##  unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score)
+  unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,]
+
+  listeTotale_metabo <- data.frame()
+  if (nrow(commonPpmList) !=0)
+  {
+    for (o in 1:length(commonPpmList[, 1]))
+    {
+      tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2])
+      w6 <- which(tf6) 
+      
+      for (s in 1:length(w6))
+      {
+        metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3])
+        commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ")
+      }
+      liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList)
+      listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm)
+      commonMetabolitesList <- data.frame()
+    }
+  }
+
+  # Representation graphique
+  if (nom_sequence == "HSQC" | nom_sequence == "HMBC")
+  {
+    atome <- "13C"
+    indice_positif <- 1
+    indice_negatif <- -10
+  }else{
+    atome <- "1H"
+    indice_positif <- 0.5
+    indice_negatif <- -0.5
+  }
+  
+  matriceComplexe <- matrixAnnotation
+  ppm1 <- as.numeric(matriceComplexe[,2])
+  ppm2 <- as.numeric(matriceComplexe[,3])
+  
+  if (unicite == "NO")
+  {
+    listeTotale_2D_a_utiliser <- allMetabolitesList
+    d1.ppm <- allMetabolitesList$ppm1 
+    d2.ppm <- allMetabolitesList$ppm2
+  }else{
+    listeTotale_2D_a_utiliser <- unicityAllList
+    d1.ppm <- listeTotale_2D_a_utiliser$ppm1 
+    d2.ppm <- listeTotale_2D_a_utiliser$ppm2
+  }
+
+  if (nrow(listeTotale_2D_a_utiliser) > 0)
+  {
+    ## Taches de correlations
+    # Matrice biologique + Annotations
+    maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2]))))
+    maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3]))))
+    probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2))
+    lgr <- length(unique(probability.score))
+    sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2))
+    sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) + 
+      scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) + 
+      xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) +
+      geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)), 
+                                                    col=probability.score), 
+                size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr))
+##      scale_color_colormap('Annotation', discrete=T, reverse=T)
+    print(sp)
+  }
+  
+  # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs
+  if (unicite == "NO")
+  {
+    return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo))
+  }else{
+    return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo))
+  }
+}
\ No newline at end of file