Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
diff annotationRmn2D.R @ 3:546c7ccd2ed4 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author | workflow4metabolomics |
---|---|
date | Fri, 04 Feb 2022 09:01:11 +0000 |
parents | dff7bde22102 |
children |
line wrap: on
line diff
--- a/annotationRmn2D.R Tue Feb 04 10:59:26 2020 -0500 +++ b/annotationRmn2D.R Fri Feb 04 09:01:11 2022 +0000 @@ -1,15 +1,14 @@ -########################################################################################################################################### -# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # -# matriceComplexe : data.frame liste couples ppm de la matrice a annoter # -# BdDStandards : objet contenant la base de donnees des composes standards # -# nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC") # -# ppm1Tol : tolerance ppm axe abscisses # -# ppm2Tol : tolerance ppm axe ordonnees # -# nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter # -########################################################################################################################################### -annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01, - seuil=0, unicite="NO") -{ +########################################################################## +# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # +# matriceComplexe : data.frame liste couples ppm de la matrice a annoter # +# BdDStandards : objet contenant la base de donnees des composes standards # +# nom_sequence : nom sequence 2D a utiliser pour annotation ("JRES", "COSY", "TOCSY", "HMBC", "HSQC") # +# ppm1Tol : tolerance ppm axe abscisses # +# ppm2Tol : tolerance ppm axe ordonnees # +# nb_ligne_template : preciser le nombre total de ligne de la feuille de calcul a annoter # +####################################################################################################### +annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol = 0.01, ppm2Tol = 0.01, + seuil = 0, unicite = "NO") { ## Longueur de la peak-list de la matrice a annoter PeakListLength <- length(matriceComplexe[, 1]) @@ -18,17 +17,16 @@ matrixAnnotation <- data.frame() allMetabolitesList <- data.frame() seuil_score <- seuil - + ## Boucle sur les metabolites inclus dans BdD - for (i in 1:nbMetabolitesBdD) - { + for (i in seq_len(nbMetabolitesBdD)) { ## Infos metabolite en cours iMetabolite <- BdDStandards[[i]] - ppm1M <- iMetabolite[,1] - ppm2M <- iMetabolite[,2] + ppm1M <- iMetabolite[, 1] + ppm2M <- iMetabolite[, 2] nbPeakMetabolite <- length(ppm1M) MetaboliteName <- names(BdDStandards[i]) -## print(MetaboliteName) + ## Initialisation k <- 0 presenceScore <- 0 @@ -37,65 +35,59 @@ annotatedPeakLength <- 0 metabolites <- data.frame() metabolitesList <- data.frame() - + ## Boucle sur les couples de pics de la matrice a annoter - for (p in 1:PeakListLength) - { + for (p in seq_len(PeakListLength)) { ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3]) ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2]) e <- simpleMessage("end of file") tryCatch({ - if (!is.na(ppmAnnotationF1)) - { + if (!is.na(ppmAnnotationF1)) { matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ])) } # Recherche du couple de pics de la matrice la liste des couples du metabolite standard - metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) & - ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol)) + metaboliteIn <- (ppm1M >= (ppmAnnotationF2 - ppm1Tol) & ppm1M <= (ppmAnnotationF2 + ppm1Tol) & + ppm2M >= (ppmAnnotationF1 - ppm2Tol) & ppm2M <= (ppmAnnotationF1 + ppm2Tol)) WhichMetaboliteIn <- which(metaboliteIn) # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard - if (length(WhichMetaboliteIn) > 0) - { - for (a in 1:length(WhichMetaboliteIn)) - { - annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite) - annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList) + if (length(WhichMetaboliteIn) > 0) { + for (a in seq_len(length(WhichMetaboliteIn))) { + annotatedPpmList <- data.frame(ppm1 = ppm1M[WhichMetaboliteIn[a]], ppm2 = ppm2M[WhichMetaboliteIn[a]], theoricalLength = nbPeakMetabolite) + annotatedPpmRef <- rbind(annotatedPpmRef, annotatedPpmList) } } - }, error=function(e){cat ("End of file \n");}) + }, error = function(e) { + cat("End of file \n"); + }) } # Au - 1 couple de ppm de la matrice complexe annote - if (nrow(annotatedPpmRef) >= 1) - { + if (nrow(annotatedPpmRef) >= 1) { ## Nombre couples annotes annotatedPeakLength <- nrow(annotatedPpmRef) - + ## Recherche doublons annotatedDoublons <- duplicated(annotatedPpmRef) - if (sum(duplicated(annotatedPpmRef)) > 0) - { + if (sum(duplicated(annotatedPpmRef)) > 0) { annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef)) annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ] } - presenceScore <- annotatedPeakLength/nbPeakMetabolite + presenceScore <- round(annotatedPeakLength / nbPeakMetabolite, 2) } - + ## Conservation metabolites dont score > seuil - if (presenceScore > seuil_score) - { - metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore) - metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) + if (presenceScore > seuil_score) { + metabolites <- data.frame(Metabolite = MetaboliteName, score = presenceScore) + metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList) } } - + # Initialisation commonPpm <- data.frame() commonPpmList <- data.frame() metaboliteAdd <- data.frame() metaboliteAddList <- data.frame() -# metabolite_ref <- data.frame() commonMetabolitesList <- data.frame() commonMetabolitesPpmList <- data.frame() commonMetabolitesPpmAllList1 <- data.frame() @@ -103,34 +95,30 @@ listeTotale_2D_unicite <- allMetabolitesList[, 1:4] allMetabolitesList <- allMetabolitesList[, -3] metabolitesAllUnicite <- data.frame() - + ## Boucle sur tous couples annotes - for (j in 1:length(allMetabolitesList$ppm1)) - { + for (j in seq_len(length(allMetabolitesList$ppm1))) { ## Boucle sur metabolites dans BdD composes standards - for (i in 1:nbMetabolitesBdD) - { + for (i in seq_len(nbMetabolitesBdD)) { ppmMetaboliteBdD <- BdDStandards[[i]] - ppm1M <- ppmMetaboliteBdD[,1] - ppm2M <- ppmMetaboliteBdD[,2] + ppm1M <- ppmMetaboliteBdD[, 1] + ppm2M <- ppmMetaboliteBdD[, 2] # Nombre de couples metabolite nbPeakMetabolite <- length(ppm1M) MetaboliteName <- names(BdDStandards[i]) - metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) & - ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol)) + metabolitesInAll <- (ppm1M >= (allMetabolitesList[j, 1] - ppm1Tol) & ppm1M <= (allMetabolitesList[j, 1] + ppm1Tol) & + ppm2M >= (allMetabolitesList[j, 2] - ppm2Tol) & ppm2M <= (allMetabolitesList[j, 2] + ppm2Tol)) WhichMetabolitesInAll <- which(metabolitesInAll) - if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) - { - metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,]) - commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2]) + if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) { + metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j, ]) + commonPpm <- data.frame(ppm1 = allMetabolitesList[j, 1], ppm2 = allMetabolitesList[j, 2]) commonPpmList <- rbind.data.frame(commonPpmList, commonPpm) commonPpmList <- unique(commonPpmList) - metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName) + metaboliteAdd <- data.frame(nom_metabolite = MetaboliteName) metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd) -# metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3]) - commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList) + commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite = allMetabolitesList[j, 3]), metaboliteAddList) commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList) commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList) commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1) @@ -138,7 +126,7 @@ } commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1) commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList) - + #initialisation des data.frame commonPpm <- data.frame() metaboliteAdd <- data.frame() @@ -150,12 +138,11 @@ } unicityAllList <- listeTotale_2D_unicite - if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0) + if (nrow(listeTotale_2D_unicite) != 0 & nrow(metabolitesAllUnicite) != 0) unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite) unicitynbCouplesRectif <- data.frame() - for (g in 1:nrow(unicityAllList)) - { + for (g in seq_len(nrow(unicityAllList))) { metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g]) WhichMetaboliteUnicity <- which(metaboliteUnicity) nb_occurence <- length(WhichMetaboliteUnicity) @@ -163,84 +150,74 @@ } names(unicitynbCouplesRectif) <- "NbCouplesAnnotes" unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif) - - unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength) + + unicityAllList <- cbind.data.frame(unicityAllList, score_unicite = unicityAllList$NbCouplesAnnotes / unicityAllList$theoricalLength) unicityAllList <- unicityAllList[, -3] unicityAllList <- unicityAllList[, -4] -## unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score) - unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,] + unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score, ] listeTotale_metabo <- data.frame() - if (nrow(commonPpmList) !=0) - { - for (o in 1:length(commonPpmList[, 1])) - { - tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2]) - w6 <- which(tf6) - - for (s in 1:length(w6)) - { - metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3]) - commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ") + if (nrow(commonPpmList) != 0) { + for (o in seq_len(length(commonPpmList[, 1]))) { + tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o, 1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o, 2]) + w6 <- which(tf6) + + for (s in seq_len(length(w6))) { + metaboliteAdd <- data.frame(nom_metabolite = commonMetabolitesPpmAllList[w6[s], 3]) + commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1, ], sep = " ") } - liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList) + liste_metabo_ppm <- cbind.data.frame(ppm1 = commonPpmList[o, 1], ppm2 = commonPpmList[o, 2], commonMetabolitesList) listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm) commonMetabolitesList <- data.frame() } } # Representation graphique - if (nom_sequence == "HSQC" | nom_sequence == "HMBC") - { + if (nom_sequence == "HSQC" | nom_sequence == "HMBC") { atome <- "13C" indice_positif <- 1 indice_negatif <- -10 - }else{ + } else { atome <- "1H" indice_positif <- 0.5 indice_negatif <- -0.5 } - + matriceComplexe <- matrixAnnotation - ppm1 <- as.numeric(matriceComplexe[,2]) - ppm2 <- as.numeric(matriceComplexe[,3]) - - if (unicite == "NO") - { + ppm1 <- as.numeric(matriceComplexe[, 2]) + ppm2 <- as.numeric(matriceComplexe[, 3]) + + if (unicite == "NO") { listeTotale_2D_a_utiliser <- allMetabolitesList - d1.ppm <- allMetabolitesList$ppm1 + d1.ppm <- allMetabolitesList$ppm1 d2.ppm <- allMetabolitesList$ppm2 - }else{ + } else { listeTotale_2D_a_utiliser <- unicityAllList - d1.ppm <- listeTotale_2D_a_utiliser$ppm1 + d1.ppm <- listeTotale_2D_a_utiliser$ppm1 d2.ppm <- listeTotale_2D_a_utiliser$ppm2 } - if (nrow(listeTotale_2D_a_utiliser) > 0) - { + if (nrow(listeTotale_2D_a_utiliser) > 0) { ## Taches de correlations # Matrice biologique + Annotations - maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2])))) - maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3])))) - probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2)) + maxX <- max(round(max(as.numeric(matriceComplexe[, 2]))) + 0.5, round(max(as.numeric(matriceComplexe[, 2])))) + maxY <- max(round(max(as.numeric(matriceComplexe[, 3]))) + indice_positif, round(max(as.numeric(matriceComplexe[, 3])))) + probability.score <- as.factor(round(listeTotale_2D_a_utiliser[, 4], 2)) lgr <- length(unique(probability.score)) - sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2)) - sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) + - scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) + + sp <- ggplot(matriceComplexe, aes(x = ppm1, y = ppm2)) + sp <- sp + geom_point(size = 2) + scale_x_reverse(breaks = seq(maxX, 0, -0.5)) + + scale_y_reverse(breaks = seq(maxY, 0, indice_negatif)) + xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) + - geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)), - col=probability.score), - size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr)) -## scale_color_colormap('Annotation', discrete=T, reverse=T) + geom_text(data = listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label = str_to_lower(substr(listeTotale_2D_a_utiliser[, 3], 1, 3)), col = probability.score), + size = 4, hjust = 0, nudge_x = 0.02, vjust = 0, nudge_y = 0.2) + scale_colour_manual(values = viridis(lgr)) print(sp) } - - # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs - if (unicite == "NO") - { - return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo)) - }else{ - return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo)) + + # Liste des resultats (couples pmm / metabolite / score) + liste ppms metabolites communs + if (unicite == "NO") { + return(list(liste_resultat = allMetabolitesList, listing_ppm_commun = listeTotale_metabo)) + } else { + return(list(liste_resultat_unicite = unicityAllList, listing_ppm_commun_affichage = listeTotale_metabo)) } -} \ No newline at end of file +}