Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
comparison nmr_annotation2d/annotationRmn2D.R @ 0:8035235e46c7 draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Mon, 23 Dec 2019 09:26:20 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8035235e46c7 |
---|---|
1 ########################################################################################################################################### | |
2 # ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # | |
3 # matriceComplexe : data.frame liste couples ppm de la matrice a annoter # | |
4 # BdDStandards : objet contenant la base de donnees des composes standards # | |
5 # nom_séquence : nom sequence 2D a utiliser pour annotation ("JRES","COSY","TOCSY","HMBC","HSQC") # | |
6 # ppm1Tol : tolerance ppm axe abscisses # | |
7 # ppm2Tol : tolerance ppm axe ordonnees # | |
8 # nb_ligne_template : préciser le nombre total de ligne de la feuille de calcul à annoter # | |
9 ########################################################################################################################################### | |
10 annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol=0.01, ppm2Tol=0.01, | |
11 seuil=0, unicite="NO") | |
12 { | |
13 ## Longueur de la peak-list de la matrice a annoter | |
14 PeakListLength <- length(matriceComplexe[, 1]) | |
15 | |
16 ## Nombre de metabolites inclus dans BdD de composes standards | |
17 nbMetabolitesBdD <- length(BdDStandards) | |
18 matrixAnnotation <- data.frame() | |
19 allMetabolitesList <- data.frame() | |
20 seuil_score <- seuil | |
21 | |
22 ## Boucle sur les metabolites inclus dans BdD | |
23 for (i in 1:nbMetabolitesBdD) | |
24 { | |
25 ## Infos metabolite en cours | |
26 iMetabolite <- BdDStandards[[i]] | |
27 ppm1M <- iMetabolite[,1] | |
28 ppm2M <- iMetabolite[,2] | |
29 nbPeakMetabolite <- length(ppm1M) | |
30 MetaboliteName <- names(BdDStandards[i]) | |
31 ## print(MetaboliteName) | |
32 ## Initialisation | |
33 k <- 0 | |
34 presenceScore <- 0 | |
35 annotatedPpmRef <- data.frame() | |
36 annotatedPpmList <- data.frame() | |
37 annotatedPeakLength <- 0 | |
38 metabolites <- data.frame() | |
39 metabolitesList <- data.frame() | |
40 | |
41 ## Boucle sur les couples de pics de la matrice a annoter | |
42 for (p in 1:PeakListLength) | |
43 { | |
44 ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3]) | |
45 ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2]) | |
46 e <- simpleMessage("end of file") | |
47 tryCatch({ | |
48 if (!is.na(ppmAnnotationF1)) | |
49 { | |
50 matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ])) | |
51 } | |
52 # Recherche du couple de pics de la matrice la liste des couples du metabolite standard | |
53 metaboliteIn <- (ppm1M >= (ppmAnnotationF2-ppm1Tol) & ppm1M <= (ppmAnnotationF2+ppm1Tol) & | |
54 ppm2M >= (ppmAnnotationF1-ppm2Tol) & ppm2M <= (ppmAnnotationF1+ppm2Tol)) | |
55 WhichMetaboliteIn <- which(metaboliteIn) | |
56 # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard | |
57 if (length(WhichMetaboliteIn) > 0) | |
58 { | |
59 for (a in 1:length(WhichMetaboliteIn)) | |
60 { | |
61 annotatedPpmList <- data.frame(ppm1=ppm1M[WhichMetaboliteIn[a]], ppm2=ppm2M[WhichMetaboliteIn[a]], theoricalLength=nbPeakMetabolite) | |
62 annotatedPpmRef <- rbind(annotatedPpmRef,annotatedPpmList) | |
63 } | |
64 } | |
65 }, error=function(e){cat ("End of file \n");}) | |
66 } | |
67 | |
68 # Au - 1 couple de ppm de la matrice complexe annote | |
69 if (nrow(annotatedPpmRef) >= 1) | |
70 { | |
71 ## Nombre couples annotes | |
72 annotatedPeakLength <- nrow(annotatedPpmRef) | |
73 | |
74 ## Recherche doublons | |
75 annotatedDoublons <- duplicated(annotatedPpmRef) | |
76 if (sum(duplicated(annotatedPpmRef)) > 0) | |
77 { | |
78 annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef)) | |
79 annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ] | |
80 } | |
81 presenceScore <- annotatedPeakLength/nbPeakMetabolite | |
82 } | |
83 | |
84 ## Conservation metabolites dont score > seuil | |
85 if (presenceScore > seuil_score) | |
86 { | |
87 metabolites <- data.frame(Metabolite=MetaboliteName, score=presenceScore) | |
88 metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) | |
89 allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList) | |
90 } | |
91 } | |
92 | |
93 # Initialisation | |
94 commonPpm <- data.frame() | |
95 commonPpmList <- data.frame() | |
96 metaboliteAdd <- data.frame() | |
97 metaboliteAddList <- data.frame() | |
98 # metabolite_ref <- data.frame() | |
99 commonMetabolitesList <- data.frame() | |
100 commonMetabolitesPpmList <- data.frame() | |
101 commonMetabolitesPpmAllList1 <- data.frame() | |
102 commonMetabolitesPpmAllList <- data.frame() | |
103 listeTotale_2D_unicite <- allMetabolitesList[, 1:4] | |
104 allMetabolitesList <- allMetabolitesList[, -3] | |
105 metabolitesAllUnicite <- data.frame() | |
106 | |
107 ## Boucle sur tous couples annotes | |
108 for (j in 1:length(allMetabolitesList$ppm1)) | |
109 { | |
110 ## Boucle sur metabolites dans BdD composes standards | |
111 for (i in 1:nbMetabolitesBdD) | |
112 { | |
113 ppmMetaboliteBdD <- BdDStandards[[i]] | |
114 ppm1M <- ppmMetaboliteBdD[,1] | |
115 ppm2M <- ppmMetaboliteBdD[,2] | |
116 # Nombre de couples metabolite | |
117 nbPeakMetabolite <- length(ppm1M) | |
118 MetaboliteName <- names(BdDStandards[i]) | |
119 | |
120 metabolitesInAll <- (ppm1M >= (allMetabolitesList[j,1]-ppm1Tol) & ppm1M <= (allMetabolitesList[j,1]+ppm1Tol) & | |
121 ppm2M >= (allMetabolitesList[j,2]-ppm2Tol) & ppm2M <= (allMetabolitesList[j,2]+ppm2Tol)) | |
122 WhichMetabolitesInAll <- which(metabolitesInAll) | |
123 | |
124 if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) | |
125 { | |
126 metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j,]) | |
127 commonPpm <- data.frame(ppm1=allMetabolitesList[j,1], ppm2=allMetabolitesList[j,2]) | |
128 commonPpmList <- rbind.data.frame(commonPpmList, commonPpm) | |
129 commonPpmList <- unique(commonPpmList) | |
130 metaboliteAdd <- data.frame(nom_metabolite=MetaboliteName) | |
131 metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd) | |
132 # metabolite_ref <- data.frame(nom_metabolite=allMetabolitesList[j,3]) | |
133 commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite=allMetabolitesList[j, 3]), metaboliteAddList) | |
134 commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList) | |
135 commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList) | |
136 commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1) | |
137 } | |
138 } | |
139 commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1) | |
140 commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList) | |
141 | |
142 #initialisation des data.frame | |
143 commonPpm <- data.frame() | |
144 metaboliteAdd <- data.frame() | |
145 metaboliteAddList <- data.frame() | |
146 metabolite_ref <- data.frame() | |
147 commonMetabolitesList <- data.frame() | |
148 commonMetabolitesPpmList <- data.frame() | |
149 commonMetabolitesPpmAllList1 <- data.frame() | |
150 } | |
151 | |
152 unicityAllList <- listeTotale_2D_unicite | |
153 if (nrow(listeTotale_2D_unicite)!=0 & nrow(metabolitesAllUnicite)!=0) | |
154 unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite) | |
155 | |
156 unicitynbCouplesRectif <- data.frame() | |
157 for (g in 1:nrow(unicityAllList)) | |
158 { | |
159 metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g]) | |
160 WhichMetaboliteUnicity <- which(metaboliteUnicity) | |
161 nb_occurence <- length(WhichMetaboliteUnicity) | |
162 unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence) | |
163 } | |
164 names(unicitynbCouplesRectif) <- "NbCouplesAnnotes" | |
165 unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif) | |
166 | |
167 unicityAllList <- cbind.data.frame(unicityAllList, score_unicite=unicityAllList$NbCouplesAnnotes/unicityAllList$theoricalLength) | |
168 unicityAllList <- unicityAllList[, -3] | |
169 unicityAllList <- unicityAllList[, -4] | |
170 | |
171 ## unicityAllList <- filter(unicityAllList, unicityAllList$score_unicite > seuil_score) | |
172 unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score,] | |
173 | |
174 listeTotale_metabo <- data.frame() | |
175 if (nrow(commonPpmList) !=0) | |
176 { | |
177 for (o in 1:length(commonPpmList[, 1])) | |
178 { | |
179 tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o,1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o,2]) | |
180 w6 <- which(tf6) | |
181 | |
182 for (s in 1:length(w6)) | |
183 { | |
184 metaboliteAdd <- data.frame(nom_metabolite=commonMetabolitesPpmAllList[w6[s],3]) | |
185 commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1,], sep = " ") | |
186 } | |
187 liste_metabo_ppm <- cbind.data.frame(ppm1=commonPpmList[o,1],ppm2=commonPpmList[o,2], commonMetabolitesList) | |
188 listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm) | |
189 commonMetabolitesList <- data.frame() | |
190 } | |
191 } | |
192 | |
193 # Representation graphique | |
194 if (nom_sequence == "HSQC" | nom_sequence == "HMBC") | |
195 { | |
196 atome <- "13C" | |
197 indice_positif <- 1 | |
198 indice_negatif <- -10 | |
199 }else{ | |
200 atome <- "1H" | |
201 indice_positif <- 0.5 | |
202 indice_negatif <- -0.5 | |
203 } | |
204 | |
205 matriceComplexe <- matrixAnnotation | |
206 ppm1 <- as.numeric(matriceComplexe[,2]) | |
207 ppm2 <- as.numeric(matriceComplexe[,3]) | |
208 | |
209 if (unicite == "NO") | |
210 { | |
211 listeTotale_2D_a_utiliser <- allMetabolitesList | |
212 d1.ppm <- allMetabolitesList$ppm1 | |
213 d2.ppm <- allMetabolitesList$ppm2 | |
214 }else{ | |
215 listeTotale_2D_a_utiliser <- unicityAllList | |
216 d1.ppm <- listeTotale_2D_a_utiliser$ppm1 | |
217 d2.ppm <- listeTotale_2D_a_utiliser$ppm2 | |
218 } | |
219 | |
220 if (nrow(listeTotale_2D_a_utiliser) > 0) | |
221 { | |
222 ## Taches de correlations | |
223 # Matrice biologique + Annotations | |
224 maxX <- max(round(max(as.numeric(matriceComplexe[,2])))+0.5, round(max(as.numeric(matriceComplexe[,2])))) | |
225 maxY <- max(round(max(as.numeric(matriceComplexe[,3])))+indice_positif, round(max(as.numeric(matriceComplexe[,3])))) | |
226 probability.score <- as.factor(round(listeTotale_2D_a_utiliser[,4],2)) | |
227 lgr <- length(unique(probability.score)) | |
228 sp <- ggplot(matriceComplexe, aes(x=ppm1, y=ppm2)) | |
229 sp <- sp + geom_point(size=2) + scale_x_reverse(breaks=seq(maxX, 0, -0.5)) + | |
230 scale_y_reverse(breaks=seq(maxY, 0, indice_negatif)) + | |
231 xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) + | |
232 geom_text(data=listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label=str_to_lower(substr(listeTotale_2D_a_utiliser[,3],1,3)), | |
233 col=probability.score), | |
234 size=4, hjust=0, nudge_x=0.02, vjust=0, nudge_y=0.2) + scale_colour_manual(values=viridis(lgr)) | |
235 ## scale_color_colormap('Annotation', discrete=T, reverse=T) | |
236 print(sp) | |
237 } | |
238 | |
239 # Liste des résultats (couples pmm / metabolite / score) + liste ppms metabolites communs | |
240 if (unicite == "NO") | |
241 { | |
242 return(list(liste_resultat=allMetabolitesList, listing_ppm_commun=listeTotale_metabo)) | |
243 }else{ | |
244 return(list(liste_resultat_unicite=unicityAllList, listing_ppm_commun_affichage=listeTotale_metabo)) | |
245 } | |
246 } |