diff annotationRmn2D.R @ 3:546c7ccd2ed4 draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author workflow4metabolomics
date Fri, 04 Feb 2022 09:01:11 +0000
parents dff7bde22102
children
line wrap: on
line diff
--- a/annotationRmn2D.R	Tue Feb 04 10:59:26 2020 -0500
+++ b/annotationRmn2D.R	Fri Feb 04 09:01:11 2022 +0000
@@ -1,15 +1,14 @@
-###########################################################################################################################################
-# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN                                                                       #
-# matriceComplexe : data.frame liste couples ppm de la matrice a annoter                                                                  #
-# BdDStandards : objet contenant la base de donnees des composes standards                                                                #
-# nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC")                                         #
-# ppm1Tol : tolerance ppm axe abscisses                                                                                                   #
-# ppm2Tol : tolerance ppm axe ordonnees                                                                                                   #
-# nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter                                                 #
-###########################################################################################################################################
-annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01, 
-                            seuil=0, unicite="NO")
-{
+##########################################################################
+# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN      #
+# matriceComplexe : data.frame liste couples ppm de la matrice a annoter #
+# BdDStandards : objet contenant la base de donnees des composes standards #
+# nom_sequence : nom sequence 2D a utiliser pour annotation ("JRES", "COSY", "TOCSY", "HMBC", "HSQC") #
+# ppm1Tol : tolerance ppm axe abscisses                                                               #
+# ppm2Tol : tolerance ppm axe ordonnees                                                               #
+# nb_ligne_template : preciser le nombre total de ligne de la feuille de calcul a annoter             #
+#######################################################################################################
+annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol = 0.01, ppm2Tol = 0.01,
+                            seuil = 0, unicite = "NO") {
   ## Longueur de la peak-list de la matrice a annoter
   PeakListLength <- length(matriceComplexe[, 1])
 
@@ -18,17 +17,16 @@
   matrixAnnotation <- data.frame()
   allMetabolitesList <- data.frame()
   seuil_score <- seuil
-  
+
   ## Boucle sur les metabolites inclus dans BdD
-  for (i in 1:nbMetabolitesBdD)
-  {
+  for (i in seq_len(nbMetabolitesBdD)) {
     ## Infos metabolite en cours
     iMetabolite <- BdDStandards[[i]]
-    ppm1M <- iMetabolite[,1] 
-    ppm2M <- iMetabolite[,2]
+    ppm1M <- iMetabolite[, 1]
+    ppm2M <- iMetabolite[, 2]
     nbPeakMetabolite <- length(ppm1M)
     MetaboliteName <- names(BdDStandards[i])
-##    print(MetaboliteName)
+
     ## Initialisation
     k <- 0
     presenceScore <- 0
@@ -37,65 +35,59 @@
     annotatedPeakLength <- 0
     metabolites <- data.frame()
     metabolitesList <- data.frame()
-  
+
     ## Boucle sur les couples de pics de la matrice a annoter
-    for (p in 1:PeakListLength)
-    { 
+    for (p in seq_len(PeakListLength)) {
       ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3])
       ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2])
       e <- simpleMessage("end of file")
       tryCatch({
-        if (!is.na(ppmAnnotationF1))
-        {
+        if (!is.na(ppmAnnotationF1)) {
           matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ]))
         }
         # Recherche du couple de pics de la matrice la liste des couples du metabolite standard
-        metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) & 
-                     ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol))
+        metaboliteIn <- (ppm1M >= (ppmAnnotationF2 - ppm1Tol) & ppm1M <= (ppmAnnotationF2 + ppm1Tol) &
+                     ppm2M >= (ppmAnnotationF1 - ppm2Tol) & ppm2M <= (ppmAnnotationF1 + ppm2Tol))
         WhichMetaboliteIn <- which(metaboliteIn)
         # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard
-        if (length(WhichMetaboliteIn) > 0)
-        {
-          for (a in 1:length(WhichMetaboliteIn))
-          {
-            annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite)
-            annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList)
+        if (length(WhichMetaboliteIn) > 0) {
+          for (a in seq_len(length(WhichMetaboliteIn))) {
+            annotatedPpmList <- data.frame(ppm1 = ppm1M[WhichMetaboliteIn[a]], ppm2 = ppm2M[WhichMetaboliteIn[a]], theoricalLength = nbPeakMetabolite)
+            annotatedPpmRef <- rbind(annotatedPpmRef, annotatedPpmList)
           }
         }
-      }, error=function(e){cat ("End of file \n");})
+      }, error = function(e) {
+        cat("End of file \n");
+      })
     }
 
     # Au - 1 couple de ppm de la matrice complexe annote
-    if (nrow(annotatedPpmRef) >= 1)
-    {
+    if (nrow(annotatedPpmRef) >= 1) {
       ## Nombre couples annotes
       annotatedPeakLength <- nrow(annotatedPpmRef)
-      
+
       ## Recherche doublons
       annotatedDoublons <- duplicated(annotatedPpmRef)
-      if (sum(duplicated(annotatedPpmRef)) > 0)
-      {
+      if (sum(duplicated(annotatedPpmRef)) > 0) {
         annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef))
         annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ]
       }
-      presenceScore <- annotatedPeakLength/nbPeakMetabolite
+      presenceScore <- round(annotatedPeakLength / nbPeakMetabolite, 2)
     }
-    
+
     ## Conservation metabolites dont score > seuil
-    if (presenceScore > seuil_score)
-    {
-      metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore)
-      metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) 
+    if (presenceScore > seuil_score) {
+      metabolites <- data.frame(Metabolite = MetaboliteName, score = presenceScore)
+      metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites)
       allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList)
     }
   }
-  
+
   # Initialisation
   commonPpm <- data.frame()
   commonPpmList <- data.frame()
   metaboliteAdd <- data.frame()
   metaboliteAddList <- data.frame()
-#  metabolite_ref <- data.frame()
   commonMetabolitesList <- data.frame()
   commonMetabolitesPpmList <- data.frame()
   commonMetabolitesPpmAllList1 <- data.frame()
@@ -103,34 +95,30 @@
   listeTotale_2D_unicite <- allMetabolitesList[, 1:4]
   allMetabolitesList <- allMetabolitesList[, -3]
   metabolitesAllUnicite <- data.frame()
-  
+
   ## Boucle sur tous couples annotes
-  for (j in 1:length(allMetabolitesList$ppm1))
-  {
+  for (j in seq_len(length(allMetabolitesList$ppm1))) {
     ## Boucle sur metabolites dans BdD composes standards
-    for (i in 1:nbMetabolitesBdD)
-    {
+    for (i in seq_len(nbMetabolitesBdD)) {
       ppmMetaboliteBdD <- BdDStandards[[i]]
-      ppm1M <- ppmMetaboliteBdD[,1] 
-      ppm2M <- ppmMetaboliteBdD[,2]
+      ppm1M <- ppmMetaboliteBdD[, 1]
+      ppm2M <- ppmMetaboliteBdD[, 2]
       # Nombre de couples metabolite
       nbPeakMetabolite <- length(ppm1M)
       MetaboliteName <- names(BdDStandards[i])
 
-      metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) & 
-                            ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol))
+      metabolitesInAll <- (ppm1M >= (allMetabolitesList[j, 1] - ppm1Tol) & ppm1M <= (allMetabolitesList[j, 1] + ppm1Tol) &
+                            ppm2M >= (allMetabolitesList[j, 2] - ppm2Tol) & ppm2M <= (allMetabolitesList[j, 2] + ppm2Tol))
       WhichMetabolitesInAll <- which(metabolitesInAll)
 
-      if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0)
-      {
-        metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,])
-        commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2])
+      if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) {
+        metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j, ])
+        commonPpm <- data.frame(ppm1 = allMetabolitesList[j, 1], ppm2 = allMetabolitesList[j, 2])
         commonPpmList <- rbind.data.frame(commonPpmList, commonPpm)
         commonPpmList <- unique(commonPpmList)
-        metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName)
+        metaboliteAdd <- data.frame(nom_metabolite = MetaboliteName)
         metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd)
-#        metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3])
-        commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList)
+        commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite = allMetabolitesList[j, 3]), metaboliteAddList)
         commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList)
         commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList)
         commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1)
@@ -138,7 +126,7 @@
     }
     commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1)
     commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList)
-    
+
     #initialisation des data.frame
     commonPpm <- data.frame()
     metaboliteAdd <- data.frame()
@@ -150,12 +138,11 @@
   }
 
   unicityAllList <- listeTotale_2D_unicite
-  if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0)
+  if (nrow(listeTotale_2D_unicite) != 0 & nrow(metabolitesAllUnicite) != 0)
     unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite)
 
   unicitynbCouplesRectif <- data.frame()
-  for (g in 1:nrow(unicityAllList))
-  {
+  for (g in seq_len(nrow(unicityAllList))) {
     metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g])
     WhichMetaboliteUnicity <- which(metaboliteUnicity)
     nb_occurence <- length(WhichMetaboliteUnicity)
@@ -163,84 +150,74 @@
   }
   names(unicitynbCouplesRectif) <- "NbCouplesAnnotes"
   unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif)
-  
-  unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength)
+
+  unicityAllList <- cbind.data.frame(unicityAllList, score_unicite = unicityAllList$NbCouplesAnnotes / unicityAllList$theoricalLength)
   unicityAllList <- unicityAllList[, -3]
   unicityAllList <- unicityAllList[, -4]
 
-##  unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score)
-  unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,]
+  unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score, ]
 
   listeTotale_metabo <- data.frame()
-  if (nrow(commonPpmList) !=0)
-  {
-    for (o in 1:length(commonPpmList[, 1]))
-    {
-      tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2])
-      w6 <- which(tf6) 
-      
-      for (s in 1:length(w6))
-      {
-        metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3])
-        commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ")
+  if (nrow(commonPpmList) != 0) {
+    for (o in seq_len(length(commonPpmList[, 1]))) {
+      tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o, 1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o, 2])
+      w6 <- which(tf6)
+
+      for (s in seq_len(length(w6))) {
+        metaboliteAdd <- data.frame(nom_metabolite = commonMetabolitesPpmAllList[w6[s], 3])
+        commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1, ], sep = " ")
       }
-      liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList)
+      liste_metabo_ppm <- cbind.data.frame(ppm1 = commonPpmList[o, 1], ppm2 = commonPpmList[o, 2], commonMetabolitesList)
       listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm)
       commonMetabolitesList <- data.frame()
     }
   }
 
   # Representation graphique
-  if (nom_sequence == "HSQC" | nom_sequence == "HMBC")
-  {
+  if (nom_sequence == "HSQC" | nom_sequence == "HMBC") {
     atome <- "13C"
     indice_positif <- 1
     indice_negatif <- -10
-  }else{
+  } else {
     atome <- "1H"
     indice_positif <- 0.5
     indice_negatif <- -0.5
   }
-  
+
   matriceComplexe <- matrixAnnotation
-  ppm1 <- as.numeric(matriceComplexe[,2])
-  ppm2 <- as.numeric(matriceComplexe[,3])
-  
-  if (unicite == "NO")
-  {
+  ppm1 <- as.numeric(matriceComplexe[, 2])
+  ppm2 <- as.numeric(matriceComplexe[, 3])
+
+  if (unicite == "NO") {
     listeTotale_2D_a_utiliser <- allMetabolitesList
-    d1.ppm <- allMetabolitesList$ppm1 
+    d1.ppm <- allMetabolitesList$ppm1
     d2.ppm <- allMetabolitesList$ppm2
-  }else{
+  } else {
     listeTotale_2D_a_utiliser <- unicityAllList
-    d1.ppm <- listeTotale_2D_a_utiliser$ppm1 
+    d1.ppm <- listeTotale_2D_a_utiliser$ppm1
     d2.ppm <- listeTotale_2D_a_utiliser$ppm2
   }
 
-  if (nrow(listeTotale_2D_a_utiliser) > 0)
-  {
+  if (nrow(listeTotale_2D_a_utiliser) > 0) {
     ## Taches de correlations
     # Matrice biologique + Annotations
-    maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2]))))
-    maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3]))))
-    probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2))
+    maxX <- max(round(max(as.numeric(matriceComplexe[, 2]))) + 0.5, round(max(as.numeric(matriceComplexe[, 2]))))
+    maxY <- max(round(max(as.numeric(matriceComplexe[, 3]))) + indice_positif, round(max(as.numeric(matriceComplexe[, 3]))))
+    probability.score <- as.factor(round(listeTotale_2D_a_utiliser[, 4], 2))
     lgr <- length(unique(probability.score))
-    sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2))
-    sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) + 
-      scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) + 
+    sp <- ggplot(matriceComplexe, aes(x = ppm1, y = ppm2))
+    sp <- sp + geom_point(size = 2) + scale_x_reverse(breaks = seq(maxX, 0, -0.5)) +
+      scale_y_reverse(breaks = seq(maxY, 0, indice_negatif)) +
       xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) +
-      geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)), 
-                                                    col=probability.score), 
-                size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr))
-##      scale_color_colormap('Annotation', discrete=T, reverse=T)
+      geom_text(data = listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label = str_to_lower(substr(listeTotale_2D_a_utiliser[, 3], 1, 3)), col = probability.score),
+                size = 4, hjust = 0, nudge_x = 0.02, vjust = 0, nudge_y = 0.2) + scale_colour_manual(values = viridis(lgr))
     print(sp)
   }
-  
-  # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs
-  if (unicite == "NO")
-  {
-    return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo))
-  }else{
-    return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo))
+
+   # Liste des resultats (couples pmm / metabolite / score) + liste ppms metabolites communs
+  if (unicite == "NO") {
+    return(list(liste_resultat = allMetabolitesList, listing_ppm_commun = listeTotale_metabo))
+  } else {
+    return(list(liste_resultat_unicite = unicityAllList, listing_ppm_commun_affichage = listeTotale_metabo))
   }
-}
\ No newline at end of file
+}