Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
comparison annotationRmn2DGlobale.R @ 4:cf11fa0c47c8 draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit b5f7f56b5ffc3c900236c077f72b321df20647be
author | workflow4metabolomics |
---|---|
date | Thu, 23 Jan 2025 15:28:44 +0000 |
parents | 546c7ccd2ed4 |
children |
comparison
equal
deleted
inserted
replaced
3:546c7ccd2ed4 | 4:cf11fa0c47c8 |
---|---|
12 # seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # | 12 # seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # |
13 # unicite : boolean pour ne retenir que les ... # | 13 # unicite : boolean pour ne retenir que les ... # |
14 ################################################################################################### | 14 ################################################################################################### |
15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) | 15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) |
16 mean.rmNa <- function(x) { | 16 mean.rmNa <- function(x) { |
17 mean(x, na.rm = TRUE) | 17 mean(x, na.rm = TRUE) |
18 } | 18 } |
19 | 19 |
20 annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { | 20 annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { |
21 ## Initialisation | 21 ## Initialisation |
22 options(max.print = 999999999) | 22 options(max.print = 999999999) |
23 annotationCOSY <- data.frame() | 23 annotationCOSY <- data.frame() |
24 annotationHMBC <- data.frame() | 24 annotationHMBC <- data.frame() |
25 annotationHSQC <- data.frame() | 25 annotationHSQC <- data.frame() |
26 annotationJRES <- data.frame() | 26 annotationJRES <- data.frame() |
27 annotationTOCSY <- data.frame() | 27 annotationTOCSY <- data.frame() |
28 | 28 |
29 dataCOSY <- "NA" | 29 dataCOSY <- "NA" |
30 dataHMBC <- "NA" | 30 dataHMBC <- "NA" |
31 dataHSQC <- "NA" | 31 dataHSQC <- "NA" |
32 dataJRES <- "NA" | 32 dataJRES <- "NA" |
33 dataTOCSY <- "NA" | 33 dataTOCSY <- "NA" |
34 | 34 |
35 ## Application seuil seulement si annotation avec 1 seule sequence | 35 ## Application seuil seulement si annotation avec 1 seule sequence |
36 seuilPls2D <- seuil | 36 seuilPls2D <- seuil |
37 | 37 |
38 if (cosy == 1) { | 38 if (cosy == 1) { |
39 matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") | 39 matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
40 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] | 40 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] |
41 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) | 41 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) |
42 dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) | 42 dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) |
43 dataCOSY <- unique.data.frame(dataCOSY) | 43 dataCOSY <- unique.data.frame(dataCOSY) |
44 } | 44 } |
45 | 45 |
46 if (hmbc == 1) { | 46 if (hmbc == 1) { |
47 matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") | 47 matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
48 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] | 48 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] |
49 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) | 49 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) |
50 dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) | 50 dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) |
51 dataHMBC <- unique.data.frame(dataHMBC) | 51 dataHMBC <- unique.data.frame(dataHMBC) |
52 } | 52 } |
53 | 53 |
54 if (hsqc == 1) { | 54 if (hsqc == 1) { |
55 matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") | 55 matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
56 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] | 56 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] |
57 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) | 57 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) |
58 dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) | 58 dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) |
59 dataHSQC <- unique.data.frame(dataHSQC) | 59 dataHSQC <- unique.data.frame(dataHSQC) |
60 } | 60 } |
61 | 61 |
62 if (jres == 1) { | 62 if (jres == 1) { |
63 matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") | 63 matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
64 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] | 64 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] |
65 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) | 65 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) |
66 dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) | 66 dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) |
67 dataJRES <- unique.data.frame(dataJRES) | 67 dataJRES <- unique.data.frame(dataJRES) |
68 } | 68 } |
69 | 69 |
70 if (tocsy == 1) { | 70 if (tocsy == 1) { |
71 matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") | 71 matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
72 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] | 72 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] |
73 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) | 73 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) |
74 dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) | 74 dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) |
75 dataTOCSY <- unique.data.frame(dataTOCSY) | 75 dataTOCSY <- unique.data.frame(dataTOCSY) |
76 } | 76 } |
77 | 77 |
78 seqCombiMeanScoreSeuil <- data.frame() | 78 seqCombiMeanScoreSeuil <- data.frame() |
79 seqCombiMeanScoreSeuilFiltre <- data.frame() | 79 seqCombiMeanScoreSeuilFiltre <- data.frame() |
80 | 80 |
81 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES | 81 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES |
82 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) | 82 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) |
83 whichSequenceNaN <- which((data2D != "NA")) | 83 whichSequenceNaN <- which((data2D != "NA")) |
84 data2D <- data2D[whichSequenceNaN] | 84 data2D <- data2D[whichSequenceNaN] |
85 sequencesCombination <- data.frame(data2D[1]) | 85 sequencesCombination <- data.frame(data2D[1]) |
86 seqCombiMeanScore <- sequencesCombination | 86 seqCombiMeanScore <- sequencesCombination |
87 | 87 |
88 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D | 88 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D |
89 if (length(data2D) >= 2) { | 89 if (length(data2D) >= 2) { |
90 ## CONCATENATION SCORE PAR SEQUENCE | 90 ## CONCATENATION SCORE PAR SEQUENCE |
91 for (l in 2:length(data2D)) | 91 for (l in 2:length(data2D)) { |
92 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) | 92 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) |
93 } | |
93 | 94 |
94 ## Replacement of NA values due to mis annotation | 95 ## Replacement of NA values due to mis annotation |
95 for (m in seq_len(nrow(sequencesCombination))) { | 96 for (m in seq_len(nrow(sequencesCombination))) { |
96 COSYcompound <- sort(names(BdDReference_COSY)) | 97 COSYcompound <- sort(names(BdDReference_COSY)) |
97 HMBCcompound <- sort(names(BdDReference_HMBC)) | 98 HMBCcompound <- sort(names(BdDReference_HMBC)) |
98 HSQCcompound <- sort(names(BdDReference_HSQC)) | 99 HSQCcompound <- sort(names(BdDReference_HSQC)) |
99 JREScompound <- sort(names(BdDReference_JRES)) | 100 JREScompound <- sort(names(BdDReference_JRES)) |
100 TOCSYcompound <- sort(names(BdDReference_TOCSY)) | 101 TOCSYcompound <- sort(names(BdDReference_TOCSY)) |
101 | 102 |
102 if (is.na(sequencesCombination[m, 2])) { | 103 if (is.na(sequencesCombination[m, 2])) { |
103 compound <- as.character(sequencesCombination[m, 1]) | 104 compound <- as.character(sequencesCombination[m, 1]) |
104 for (c in seq_len(length(COSYcompound))) | 105 for (c in seq_len(length(COSYcompound))) { |
105 if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) | 106 if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) { |
106 sequencesCombination[m, 2] <- 0 | 107 sequencesCombination[m, 2] <- 0 |
108 } | |
109 } | |
110 } | |
111 | |
112 if (is.na(sequencesCombination[m, 3])) { | |
113 compound <- as.character(sequencesCombination[m, 1]) | |
114 for (c in seq_len(length(HMBCcompound))) { | |
115 if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) { | |
116 sequencesCombination[m, 3] <- 0 | |
117 } | |
118 } | |
119 } | |
120 | |
121 if (is.na(sequencesCombination[m, 4])) { | |
122 compound <- as.character(sequencesCombination[m, 1]) | |
123 for (c in seq_len(length(HSQCcompound))) { | |
124 if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) { | |
125 sequencesCombination[m, 4] <- 0 | |
126 } | |
127 } | |
128 } | |
129 | |
130 if (is.na(sequencesCombination[m, 5])) { | |
131 compound <- as.character(sequencesCombination[m, 1]) | |
132 for (c in seq_len(length(JREScompound))) { | |
133 if (str_to_lower(compound) == str_to_lower(JREScompound[c])) { | |
134 sequencesCombination[m, 5] <- 0 | |
135 } | |
136 } | |
137 } | |
138 | |
139 if (is.na(sequencesCombination[m, 6])) { | |
140 compound <- as.character(sequencesCombination[m, 1]) | |
141 for (c in seq_len(length(TOCSYcompound))) { | |
142 if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) { | |
143 sequencesCombination[m, 6] <- 0 | |
144 } | |
145 } | |
146 } | |
147 } | |
148 | |
149 ## SCORE MOYEN (sans prise en compte valeurs manquantes) | |
150 meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) | |
151 seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) | |
152 | |
153 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL | |
154 seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] | |
107 } | 155 } |
108 | 156 |
109 if (is.na(sequencesCombination[m, 3])) { | 157 return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) |
110 compound <- as.character(sequencesCombination[m, 1]) | |
111 for (c in seq_len(length(HMBCcompound))) | |
112 if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) | |
113 sequencesCombination[m, 3] <- 0 | |
114 } | |
115 | |
116 if (is.na(sequencesCombination[m, 4])) { | |
117 compound <- as.character(sequencesCombination[m, 1]) | |
118 for (c in seq_len(length(HSQCcompound))) | |
119 if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) | |
120 sequencesCombination[m, 4] <- 0 | |
121 } | |
122 | |
123 if (is.na(sequencesCombination[m, 5])) { | |
124 compound <- as.character(sequencesCombination[m, 1]) | |
125 for (c in seq_len(length(JREScompound))) | |
126 if (str_to_lower(compound) == str_to_lower(JREScompound[c])) | |
127 sequencesCombination[m, 5] <- 0 | |
128 } | |
129 | |
130 if (is.na(sequencesCombination[m, 6])) { | |
131 compound <- as.character(sequencesCombination[m, 1]) | |
132 for (c in seq_len(length(TOCSYcompound))) | |
133 if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) | |
134 sequencesCombination[m, 6] <- 0 | |
135 } | |
136 } | |
137 | |
138 ## SCORE MOYEN (sans prise en compte valeurs manquantes) | |
139 meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) | |
140 seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) | |
141 | |
142 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL | |
143 seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] | |
144 } | |
145 | |
146 return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) | |
147 } | 158 } |