Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
diff annotationRmn2D.R @ 2:dff7bde22102 draft
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit b3abcb650e9b38458aa0ac5f7d838811d982ff65"
author | workflow4metabolomics |
---|---|
date | Tue, 04 Feb 2020 10:59:26 -0500 |
parents | |
children | 546c7ccd2ed4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotationRmn2D.R Tue Feb 04 10:59:26 2020 -0500 @@ -0,0 +1,246 @@ +########################################################################################################################################### +# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # +# matriceComplexe : data.frame liste couples ppm de la matrice a annoter # +# BdDStandards : objet contenant la base de donnees des composes standards # +# nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC") # +# ppm1Tol : tolerance ppm axe abscisses # +# ppm2Tol : tolerance ppm axe ordonnees # +# nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter # +########################################################################################################################################### +annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01, + seuil=0, unicite="NO") +{ + ## Longueur de la peak-list de la matrice a annoter + PeakListLength <- length(matriceComplexe[, 1]) + + ## Nombre de metabolites inclus dans BdD de composes standards + nbMetabolitesBdD <- length(BdDStandards) + matrixAnnotation <- data.frame() + allMetabolitesList <- data.frame() + seuil_score <- seuil + + ## Boucle sur les metabolites inclus dans BdD + for (i in 1:nbMetabolitesBdD) + { + ## Infos metabolite en cours + iMetabolite <- BdDStandards[[i]] + ppm1M <- iMetabolite[,1] + ppm2M <- iMetabolite[,2] + nbPeakMetabolite <- length(ppm1M) + MetaboliteName <- names(BdDStandards[i]) +## print(MetaboliteName) + ## Initialisation + k <- 0 + presenceScore <- 0 + annotatedPpmRef <- data.frame() + annotatedPpmList <- data.frame() + annotatedPeakLength <- 0 + metabolites <- data.frame() + metabolitesList <- data.frame() + + ## Boucle sur les couples de pics de la matrice a annoter + for (p in 1:PeakListLength) + { + ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3]) + ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2]) + e <- simpleMessage("end of file") + tryCatch({ + if (!is.na(ppmAnnotationF1)) + { + matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ])) + } + # Recherche du couple de pics de la matrice la liste des couples du metabolite standard + metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) & + ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol)) + WhichMetaboliteIn <- which(metaboliteIn) + # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard + if (length(WhichMetaboliteIn) > 0) + { + for (a in 1:length(WhichMetaboliteIn)) + { + annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite) + annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList) + } + } + }, error=function(e){cat ("End of file \n");}) + } + + # Au - 1 couple de ppm de la matrice complexe annote + if (nrow(annotatedPpmRef) >= 1) + { + ## Nombre couples annotes + annotatedPeakLength <- nrow(annotatedPpmRef) + + ## Recherche doublons + annotatedDoublons <- duplicated(annotatedPpmRef) + if (sum(duplicated(annotatedPpmRef)) > 0) + { + annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef)) + annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ] + } + presenceScore <- annotatedPeakLength/nbPeakMetabolite + } + + ## Conservation metabolites dont score > seuil + if (presenceScore > seuil_score) + { + metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore) + metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) + allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList) + } + } + + # Initialisation + commonPpm <- data.frame() + commonPpmList <- data.frame() + metaboliteAdd <- data.frame() + metaboliteAddList <- data.frame() +# metabolite_ref <- data.frame() + commonMetabolitesList <- data.frame() + commonMetabolitesPpmList <- data.frame() + commonMetabolitesPpmAllList1 <- data.frame() + commonMetabolitesPpmAllList <- data.frame() + listeTotale_2D_unicite <- allMetabolitesList[, 1:4] + allMetabolitesList <- allMetabolitesList[, -3] + metabolitesAllUnicite <- data.frame() + + ## Boucle sur tous couples annotes + for (j in 1:length(allMetabolitesList$ppm1)) + { + ## Boucle sur metabolites dans BdD composes standards + for (i in 1:nbMetabolitesBdD) + { + ppmMetaboliteBdD <- BdDStandards[[i]] + ppm1M <- ppmMetaboliteBdD[,1] + ppm2M <- ppmMetaboliteBdD[,2] + # Nombre de couples metabolite + nbPeakMetabolite <- length(ppm1M) + MetaboliteName <- names(BdDStandards[i]) + + metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) & + ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol)) + WhichMetabolitesInAll <- which(metabolitesInAll) + + if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) + { + metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,]) + commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2]) + commonPpmList <- rbind.data.frame(commonPpmList, commonPpm) + commonPpmList <- unique(commonPpmList) + metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName) + metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd) +# metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3]) + commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList) + commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList) + commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList) + commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1) + } + } + commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1) + commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList) + + #initialisation des data.frame + commonPpm <- data.frame() + metaboliteAdd <- data.frame() + metaboliteAddList <- data.frame() + metabolite_ref <- data.frame() + commonMetabolitesList <- data.frame() + commonMetabolitesPpmList <- data.frame() + commonMetabolitesPpmAllList1 <- data.frame() + } + + unicityAllList <- listeTotale_2D_unicite + if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0) + unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite) + + unicitynbCouplesRectif <- data.frame() + for (g in 1:nrow(unicityAllList)) + { + metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g]) + WhichMetaboliteUnicity <- which(metaboliteUnicity) + nb_occurence <- length(WhichMetaboliteUnicity) + unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence) + } + names(unicitynbCouplesRectif) <- "NbCouplesAnnotes" + unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif) + + unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength) + unicityAllList <- unicityAllList[, -3] + unicityAllList <- unicityAllList[, -4] + +## unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score) + unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,] + + listeTotale_metabo <- data.frame() + if (nrow(commonPpmList) !=0) + { + for (o in 1:length(commonPpmList[, 1])) + { + tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2]) + w6 <- which(tf6) + + for (s in 1:length(w6)) + { + metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3]) + commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ") + } + liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList) + listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm) + commonMetabolitesList <- data.frame() + } + } + + # Representation graphique + if (nom_sequence == "HSQC" | nom_sequence == "HMBC") + { + atome <- "13C" + indice_positif <- 1 + indice_negatif <- -10 + }else{ + atome <- "1H" + indice_positif <- 0.5 + indice_negatif <- -0.5 + } + + matriceComplexe <- matrixAnnotation + ppm1 <- as.numeric(matriceComplexe[,2]) + ppm2 <- as.numeric(matriceComplexe[,3]) + + if (unicite == "NO") + { + listeTotale_2D_a_utiliser <- allMetabolitesList + d1.ppm <- allMetabolitesList$ppm1 + d2.ppm <- allMetabolitesList$ppm2 + }else{ + listeTotale_2D_a_utiliser <- unicityAllList + d1.ppm <- listeTotale_2D_a_utiliser$ppm1 + d2.ppm <- listeTotale_2D_a_utiliser$ppm2 + } + + if (nrow(listeTotale_2D_a_utiliser) > 0) + { + ## Taches de correlations + # Matrice biologique + Annotations + maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2])))) + maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3])))) + probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2)) + lgr <- length(unique(probability.score)) + sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2)) + sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) + + scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) + + xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) + + geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)), + col=probability.score), + size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr)) +## scale_color_colormap('Annotation', discrete=T, reverse=T) + print(sp) + } + + # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs + if (unicite == "NO") + { + return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo)) + }else{ + return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo)) + } +} \ No newline at end of file