Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
comparison nmr_annotation2d/annotationRmn2DGlobale.R @ 0:8035235e46c7 draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Mon, 23 Dec 2019 09:26:20 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8035235e46c7 |
---|---|
1 ########################################################################################################################################### | |
2 # ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) RMN # | |
3 # template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # | |
4 # cosy : 1 si sequence a utiliser / 0 sinon # | |
5 # hmbc : 1 si sequence a utiliser / 0 sinon # | |
6 # hsqc : 1 si sequence a utiliser / 0 sinon # | |
7 # jres : 1 si sequence a utiliser / 0 sinon # | |
8 # tocsy : 1 si sequence a utiliser / 0 sinon # | |
9 # tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # | |
10 # tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # | |
11 # tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # | |
12 # seuil : valeur du score de presence en deça de laquelle les metabolites annotes ne sont pas retenus # | |
13 # unicite : boolean pour ne retenir que les ... # | |
14 ########################################################################################################################################### | |
15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) | |
16 mean.rmNa <- function(x) | |
17 { | |
18 mean(x, na.rm=TRUE) | |
19 } | |
20 | |
21 annotationRmn2DGlobale <- function(template, tolPpm1=0.01, tolPpm2HJRes=0.002, tolPpm2C=0.5, cosy=1, hmbc=1, hsqc=1, jres=1, tocsy=1, | |
22 seuil, unicite="NO") | |
23 { | |
24 ## Initialisation | |
25 options (max.print=999999999) | |
26 annotationCOSY <- data.frame() | |
27 annotationHMBC <- data.frame() | |
28 annotationHSQC <- data.frame() | |
29 annotationJRES <- data.frame() | |
30 annotationTOCSY <- data.frame() | |
31 | |
32 dataCOSY <- "NA" | |
33 dataHMBC <- "NA" | |
34 dataHSQC <- "NA" | |
35 dataJRES <- "NA" | |
36 dataTOCSY <- "NA" | |
37 | |
38 ## Application seuil seulement si annotation avec 1 seule sequence | |
39 ## seuilPls2D <- 0 | |
40 ## if ((sum(cosy, hmbc, hsqc, jres, tocsy)) == 1) | |
41 ## seuilPls2D <- seuil | |
42 seuilPls2D <- seuil | |
43 | |
44 if (cosy == 1) | |
45 { | |
46 matrice.cosy <- read.xlsx(template, sheet="COSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
47 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] | |
48 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, | |
49 unicite=unicite) | |
50 dataCOSY <- data.frame(Metabolite=str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY=annotationCOSY$liste_resultat$score) | |
51 dataCOSY <- unique.data.frame(dataCOSY) | |
52 } | |
53 | |
54 if (hmbc == 1) | |
55 { | |
56 matrice.hmbc <- read.xlsx(template, sheet="HMBC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
57 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] | |
58 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, | |
59 unicite=unicite) | |
60 dataHMBC <- data.frame(Metabolite=str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC=annotationHMBC$liste_resultat$score) | |
61 dataHMBC <- unique.data.frame(dataHMBC) | |
62 } | |
63 | |
64 if (hsqc == 1) | |
65 { | |
66 matrice.hsqc <- read.xlsx(template, sheet="HSQC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
67 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] | |
68 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, | |
69 unicite=unicite) | |
70 dataHSQC <- data.frame(Metabolite=str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC=annotationHSQC$liste_resultat$score) | |
71 dataHSQC <- unique.data.frame(dataHSQC) | |
72 } | |
73 | |
74 if (jres == 1) | |
75 { | |
76 matrice.jres <- read.xlsx(template, sheet="JRES", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
77 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] | |
78 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2HJRes, seuil=seuilPls2D, | |
79 unicite=unicite) | |
80 dataJRES <- data.frame(Metabolite=str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES=annotationJRES$liste_resultat$score) | |
81 dataJRES <- unique.data.frame(dataJRES) | |
82 } | |
83 | |
84 if (tocsy == 1) | |
85 { | |
86 matrice.tocsy <- read.xlsx(template, sheet="TOCSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
87 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] | |
88 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, | |
89 unicite=unicite) | |
90 dataTOCSY <- data.frame(Metabolite=str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY=annotationTOCSY$liste_resultat$score) | |
91 dataTOCSY <- unique.data.frame(dataTOCSY) | |
92 } | |
93 | |
94 sequencesCombinationAverageScoreSeuil <- data.frame() | |
95 sequencesCombinationAverageScoreSeuilFiltre <- data.frame() | |
96 | |
97 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES | |
98 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) | |
99 whichSequenceNaN <- which((data2D != "NA")) | |
100 data2D <- data2D[whichSequenceNaN] | |
101 sequencesCombination <- data.frame(data2D[1]) | |
102 sequencesCombinationAverageScore <- sequencesCombination | |
103 | |
104 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D | |
105 if (length(data2D) >= 2) | |
106 { | |
107 ## CONCATENATION SCORE PAR SEQUENCE | |
108 for (l in 2:length(data2D)) | |
109 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by="Metabolite", all.x=TRUE, all.y=TRUE) | |
110 | |
111 ## SCORE MOYEN (sans prise en compte valeurs manquantes) | |
112 meanScore <- apply(sequencesCombination[, -1], 1, FUN=mean.rmNa) | |
113 sequencesCombinationAverageScore <- cbind.data.frame(sequencesCombination, averageScore=meanScore) | |
114 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL | |
115 ## sequencesCombinationAverageScoreSeuilFiltre <- filter(sequencesCombinationAverageScore, averageScore >= seuil) | |
116 sequencesCombinationAverageScoreSeuilFiltre <- sequencesCombinationAverageScore[sequencesCombinationAverageScore$averageScore > seuil, ] | |
117 } | |
118 | |
119 return(list(COSY=annotationCOSY, HMBC=annotationHMBC, HSQC=annotationHSQC, JRES=annotationJRES, TOCSY=annotationTOCSY, | |
120 combination=sequencesCombinationAverageScoreSeuilFiltre)) | |
121 } |