0
|
1 ###########################################################################################################################################
|
|
2 # ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN #
|
|
3 # matriceComplexe : data.frame liste couples ppm de la matrice a annoter #
|
|
4 # BdDStandards : objet contenant la base de donnees des composes standards #
|
|
5 # nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC") #
|
|
6 # ppm1Tol : tolerance ppm axe abscisses #
|
|
7 # ppm2Tol : tolerance ppm axe ordonnees #
|
|
8 # nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter #
|
|
9 ###########################################################################################################################################
|
|
10 annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01,
|
|
11 seuil=0, unicite="NO")
|
|
12 {
|
|
13 ## Longueur de la peak-list de la matrice a annoter
|
|
14 PeakListLength <- length(matriceComplexe[, 1])
|
|
15
|
|
16 ## Nombre de metabolites inclus dans BdD de composes standards
|
|
17 nbMetabolitesBdD <- length(BdDStandards)
|
|
18 matrixAnnotation <- data.frame()
|
|
19 allMetabolitesList <- data.frame()
|
|
20 seuil_score <- seuil
|
|
21
|
|
22 ## Boucle sur les metabolites inclus dans BdD
|
|
23 for (i in 1:nbMetabolitesBdD)
|
|
24 {
|
|
25 ## Infos metabolite en cours
|
|
26 iMetabolite <- BdDStandards[[i]]
|
|
27 ppm1M <- iMetabolite[,1]
|
|
28 ppm2M <- iMetabolite[,2]
|
|
29 nbPeakMetabolite <- length(ppm1M)
|
|
30 MetaboliteName <- names(BdDStandards[i])
|
|
31 ## print(MetaboliteName)
|
|
32 ## Initialisation
|
|
33 k <- 0
|
|
34 presenceScore <- 0
|
|
35 annotatedPpmRef <- data.frame()
|
|
36 annotatedPpmList <- data.frame()
|
|
37 annotatedPeakLength <- 0
|
|
38 metabolites <- data.frame()
|
|
39 metabolitesList <- data.frame()
|
|
40
|
|
41 ## Boucle sur les couples de pics de la matrice a annoter
|
|
42 for (p in 1:PeakListLength)
|
|
43 {
|
|
44 ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3])
|
|
45 ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2])
|
|
46 e <- simpleMessage("end of file")
|
|
47 tryCatch({
|
|
48 if (!is.na(ppmAnnotationF1))
|
|
49 {
|
|
50 matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ]))
|
|
51 }
|
|
52 # Recherche du couple de pics de la matrice la liste des couples du metabolite standard
|
|
53 metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) &
|
|
54 ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol))
|
|
55 WhichMetaboliteIn <- which(metaboliteIn)
|
|
56 # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard
|
|
57 if (length(WhichMetaboliteIn) > 0)
|
|
58 {
|
|
59 for (a in 1:length(WhichMetaboliteIn))
|
|
60 {
|
|
61 annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite)
|
|
62 annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList)
|
|
63 }
|
|
64 }
|
|
65 }, error=function(e){cat ("End of file \n");})
|
|
66 }
|
|
67
|
|
68 # Au - 1 couple de ppm de la matrice complexe annote
|
|
69 if (nrow(annotatedPpmRef) >= 1)
|
|
70 {
|
|
71 ## Nombre couples annotes
|
|
72 annotatedPeakLength <- nrow(annotatedPpmRef)
|
|
73
|
|
74 ## Recherche doublons
|
|
75 annotatedDoublons <- duplicated(annotatedPpmRef)
|
|
76 if (sum(duplicated(annotatedPpmRef)) > 0)
|
|
77 {
|
|
78 annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef))
|
|
79 annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ]
|
|
80 }
|
|
81 presenceScore <- annotatedPeakLength/nbPeakMetabolite
|
|
82 }
|
|
83
|
|
84 ## Conservation metabolites dont score > seuil
|
|
85 if (presenceScore > seuil_score)
|
|
86 {
|
|
87 metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore)
|
|
88 metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites)
|
|
89 allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList)
|
|
90 }
|
|
91 }
|
|
92
|
|
93 # Initialisation
|
|
94 commonPpm <- data.frame()
|
|
95 commonPpmList <- data.frame()
|
|
96 metaboliteAdd <- data.frame()
|
|
97 metaboliteAddList <- data.frame()
|
|
98 # metabolite_ref <- data.frame()
|
|
99 commonMetabolitesList <- data.frame()
|
|
100 commonMetabolitesPpmList <- data.frame()
|
|
101 commonMetabolitesPpmAllList1 <- data.frame()
|
|
102 commonMetabolitesPpmAllList <- data.frame()
|
|
103 listeTotale_2D_unicite <- allMetabolitesList[, 1:4]
|
|
104 allMetabolitesList <- allMetabolitesList[, -3]
|
|
105 metabolitesAllUnicite <- data.frame()
|
|
106
|
|
107 ## Boucle sur tous couples annotes
|
|
108 for (j in 1:length(allMetabolitesList$ppm1))
|
|
109 {
|
|
110 ## Boucle sur metabolites dans BdD composes standards
|
|
111 for (i in 1:nbMetabolitesBdD)
|
|
112 {
|
|
113 ppmMetaboliteBdD <- BdDStandards[[i]]
|
|
114 ppm1M <- ppmMetaboliteBdD[,1]
|
|
115 ppm2M <- ppmMetaboliteBdD[,2]
|
|
116 # Nombre de couples metabolite
|
|
117 nbPeakMetabolite <- length(ppm1M)
|
|
118 MetaboliteName <- names(BdDStandards[i])
|
|
119
|
|
120 metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) &
|
|
121 ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol))
|
|
122 WhichMetabolitesInAll <- which(metabolitesInAll)
|
|
123
|
|
124 if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0)
|
|
125 {
|
|
126 metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,])
|
|
127 commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2])
|
|
128 commonPpmList <- rbind.data.frame(commonPpmList, commonPpm)
|
|
129 commonPpmList <- unique(commonPpmList)
|
|
130 metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName)
|
|
131 metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd)
|
|
132 # metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3])
|
|
133 commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList)
|
|
134 commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList)
|
|
135 commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList)
|
|
136 commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1)
|
|
137 }
|
|
138 }
|
|
139 commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1)
|
|
140 commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList)
|
|
141
|
|
142 #initialisation des data.frame
|
|
143 commonPpm <- data.frame()
|
|
144 metaboliteAdd <- data.frame()
|
|
145 metaboliteAddList <- data.frame()
|
|
146 metabolite_ref <- data.frame()
|
|
147 commonMetabolitesList <- data.frame()
|
|
148 commonMetabolitesPpmList <- data.frame()
|
|
149 commonMetabolitesPpmAllList1 <- data.frame()
|
|
150 }
|
|
151
|
|
152 unicityAllList <- listeTotale_2D_unicite
|
|
153 if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0)
|
|
154 unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite)
|
|
155
|
|
156 unicitynbCouplesRectif <- data.frame()
|
|
157 for (g in 1:nrow(unicityAllList))
|
|
158 {
|
|
159 metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g])
|
|
160 WhichMetaboliteUnicity <- which(metaboliteUnicity)
|
|
161 nb_occurence <- length(WhichMetaboliteUnicity)
|
|
162 unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence)
|
|
163 }
|
|
164 names(unicitynbCouplesRectif) <- "NbCouplesAnnotes"
|
|
165 unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif)
|
|
166
|
|
167 unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength)
|
|
168 unicityAllList <- unicityAllList[, -3]
|
|
169 unicityAllList <- unicityAllList[, -4]
|
|
170
|
|
171 ## unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score)
|
|
172 unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,]
|
|
173
|
|
174 listeTotale_metabo <- data.frame()
|
|
175 if (nrow(commonPpmList) !=0)
|
|
176 {
|
|
177 for (o in 1:length(commonPpmList[, 1]))
|
|
178 {
|
|
179 tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2])
|
|
180 w6 <- which(tf6)
|
|
181
|
|
182 for (s in 1:length(w6))
|
|
183 {
|
|
184 metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3])
|
|
185 commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ")
|
|
186 }
|
|
187 liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList)
|
|
188 listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm)
|
|
189 commonMetabolitesList <- data.frame()
|
|
190 }
|
|
191 }
|
|
192
|
|
193 # Representation graphique
|
|
194 if (nom_sequence == "HSQC" | nom_sequence == "HMBC")
|
|
195 {
|
|
196 atome <- "13C"
|
|
197 indice_positif <- 1
|
|
198 indice_negatif <- -10
|
|
199 }else{
|
|
200 atome <- "1H"
|
|
201 indice_positif <- 0.5
|
|
202 indice_negatif <- -0.5
|
|
203 }
|
|
204
|
|
205 matriceComplexe <- matrixAnnotation
|
|
206 ppm1 <- as.numeric(matriceComplexe[,2])
|
|
207 ppm2 <- as.numeric(matriceComplexe[,3])
|
|
208
|
|
209 if (unicite == "NO")
|
|
210 {
|
|
211 listeTotale_2D_a_utiliser <- allMetabolitesList
|
|
212 d1.ppm <- allMetabolitesList$ppm1
|
|
213 d2.ppm <- allMetabolitesList$ppm2
|
|
214 }else{
|
|
215 listeTotale_2D_a_utiliser <- unicityAllList
|
|
216 d1.ppm <- listeTotale_2D_a_utiliser$ppm1
|
|
217 d2.ppm <- listeTotale_2D_a_utiliser$ppm2
|
|
218 }
|
|
219
|
|
220 if (nrow(listeTotale_2D_a_utiliser) > 0)
|
|
221 {
|
|
222 ## Taches de correlations
|
|
223 # Matrice biologique + Annotations
|
|
224 maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2]))))
|
|
225 maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3]))))
|
|
226 probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2))
|
|
227 lgr <- length(unique(probability.score))
|
|
228 sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2))
|
|
229 sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) +
|
|
230 scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) +
|
|
231 xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) +
|
|
232 geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)),
|
|
233 col=probability.score),
|
|
234 size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr))
|
|
235 ## scale_color_colormap('Annotation', discrete=T, reverse=T)
|
|
236 print(sp)
|
|
237 }
|
|
238
|
|
239 # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs
|
|
240 if (unicite == "NO")
|
|
241 {
|
|
242 return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo))
|
|
243 }else{
|
|
244 return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo))
|
|
245 }
|
|
246 } |