comparison annotationRmn2DGlobale.R @ 4:cf11fa0c47c8 draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit b5f7f56b5ffc3c900236c077f72b321df20647be
author workflow4metabolomics
date Thu, 23 Jan 2025 15:28:44 +0000
parents 546c7ccd2ed4
children
comparison
equal deleted inserted replaced
3:546c7ccd2ed4 4:cf11fa0c47c8
12 # seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # 12 # seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus #
13 # unicite : boolean pour ne retenir que les ... # 13 # unicite : boolean pour ne retenir que les ... #
14 ################################################################################################### 14 ###################################################################################################
15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) 15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S)
16 mean.rmNa <- function(x) { 16 mean.rmNa <- function(x) {
17 mean(x, na.rm = TRUE) 17 mean(x, na.rm = TRUE)
18 } 18 }
19 19
20 annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { 20 annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") {
21 ## Initialisation 21 ## Initialisation
22 options(max.print = 999999999) 22 options(max.print = 999999999)
23 annotationCOSY <- data.frame() 23 annotationCOSY <- data.frame()
24 annotationHMBC <- data.frame() 24 annotationHMBC <- data.frame()
25 annotationHSQC <- data.frame() 25 annotationHSQC <- data.frame()
26 annotationJRES <- data.frame() 26 annotationJRES <- data.frame()
27 annotationTOCSY <- data.frame() 27 annotationTOCSY <- data.frame()
28 28
29 dataCOSY <- "NA" 29 dataCOSY <- "NA"
30 dataHMBC <- "NA" 30 dataHMBC <- "NA"
31 dataHSQC <- "NA" 31 dataHSQC <- "NA"
32 dataJRES <- "NA" 32 dataJRES <- "NA"
33 dataTOCSY <- "NA" 33 dataTOCSY <- "NA"
34 34
35 ## Application seuil seulement si annotation avec 1 seule sequence 35 ## Application seuil seulement si annotation avec 1 seule sequence
36 seuilPls2D <- seuil 36 seuilPls2D <- seuil
37 37
38 if (cosy == 1) { 38 if (cosy == 1) {
39 matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") 39 matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
40 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] 40 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ]
41 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) 41 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite)
42 dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) 42 dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score)
43 dataCOSY <- unique.data.frame(dataCOSY) 43 dataCOSY <- unique.data.frame(dataCOSY)
44 } 44 }
45 45
46 if (hmbc == 1) { 46 if (hmbc == 1) {
47 matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") 47 matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
48 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] 48 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ]
49 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) 49 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite)
50 dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) 50 dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score)
51 dataHMBC <- unique.data.frame(dataHMBC) 51 dataHMBC <- unique.data.frame(dataHMBC)
52 } 52 }
53 53
54 if (hsqc == 1) { 54 if (hsqc == 1) {
55 matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") 55 matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
56 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] 56 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ]
57 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) 57 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite)
58 dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) 58 dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score)
59 dataHSQC <- unique.data.frame(dataHSQC) 59 dataHSQC <- unique.data.frame(dataHSQC)
60 } 60 }
61 61
62 if (jres == 1) { 62 if (jres == 1) {
63 matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") 63 matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
64 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] 64 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ]
65 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) 65 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite)
66 dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) 66 dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score)
67 dataJRES <- unique.data.frame(dataJRES) 67 dataJRES <- unique.data.frame(dataJRES)
68 } 68 }
69 69
70 if (tocsy == 1) { 70 if (tocsy == 1) {
71 matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") 71 matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA")
72 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] 72 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ]
73 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) 73 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite)
74 dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) 74 dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score)
75 dataTOCSY <- unique.data.frame(dataTOCSY) 75 dataTOCSY <- unique.data.frame(dataTOCSY)
76 } 76 }
77 77
78 seqCombiMeanScoreSeuil <- data.frame() 78 seqCombiMeanScoreSeuil <- data.frame()
79 seqCombiMeanScoreSeuilFiltre <- data.frame() 79 seqCombiMeanScoreSeuilFiltre <- data.frame()
80 80
81 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES 81 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES
82 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) 82 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY)
83 whichSequenceNaN <- which((data2D != "NA")) 83 whichSequenceNaN <- which((data2D != "NA"))
84 data2D <- data2D[whichSequenceNaN] 84 data2D <- data2D[whichSequenceNaN]
85 sequencesCombination <- data.frame(data2D[1]) 85 sequencesCombination <- data.frame(data2D[1])
86 seqCombiMeanScore <- sequencesCombination 86 seqCombiMeanScore <- sequencesCombination
87 87
88 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D 88 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D
89 if (length(data2D) >= 2) { 89 if (length(data2D) >= 2) {
90 ## CONCATENATION SCORE PAR SEQUENCE 90 ## CONCATENATION SCORE PAR SEQUENCE
91 for (l in 2:length(data2D)) 91 for (l in 2:length(data2D)) {
92 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) 92 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE)
93 }
93 94
94 ## Replacement of NA values due to mis annotation 95 ## Replacement of NA values due to mis annotation
95 for (m in seq_len(nrow(sequencesCombination))) { 96 for (m in seq_len(nrow(sequencesCombination))) {
96 COSYcompound <- sort(names(BdDReference_COSY)) 97 COSYcompound <- sort(names(BdDReference_COSY))
97 HMBCcompound <- sort(names(BdDReference_HMBC)) 98 HMBCcompound <- sort(names(BdDReference_HMBC))
98 HSQCcompound <- sort(names(BdDReference_HSQC)) 99 HSQCcompound <- sort(names(BdDReference_HSQC))
99 JREScompound <- sort(names(BdDReference_JRES)) 100 JREScompound <- sort(names(BdDReference_JRES))
100 TOCSYcompound <- sort(names(BdDReference_TOCSY)) 101 TOCSYcompound <- sort(names(BdDReference_TOCSY))
101 102
102 if (is.na(sequencesCombination[m, 2])) { 103 if (is.na(sequencesCombination[m, 2])) {
103 compound <- as.character(sequencesCombination[m, 1]) 104 compound <- as.character(sequencesCombination[m, 1])
104 for (c in seq_len(length(COSYcompound))) 105 for (c in seq_len(length(COSYcompound))) {
105 if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) 106 if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) {
106 sequencesCombination[m, 2] <- 0 107 sequencesCombination[m, 2] <- 0
108 }
109 }
110 }
111
112 if (is.na(sequencesCombination[m, 3])) {
113 compound <- as.character(sequencesCombination[m, 1])
114 for (c in seq_len(length(HMBCcompound))) {
115 if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) {
116 sequencesCombination[m, 3] <- 0
117 }
118 }
119 }
120
121 if (is.na(sequencesCombination[m, 4])) {
122 compound <- as.character(sequencesCombination[m, 1])
123 for (c in seq_len(length(HSQCcompound))) {
124 if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) {
125 sequencesCombination[m, 4] <- 0
126 }
127 }
128 }
129
130 if (is.na(sequencesCombination[m, 5])) {
131 compound <- as.character(sequencesCombination[m, 1])
132 for (c in seq_len(length(JREScompound))) {
133 if (str_to_lower(compound) == str_to_lower(JREScompound[c])) {
134 sequencesCombination[m, 5] <- 0
135 }
136 }
137 }
138
139 if (is.na(sequencesCombination[m, 6])) {
140 compound <- as.character(sequencesCombination[m, 1])
141 for (c in seq_len(length(TOCSYcompound))) {
142 if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) {
143 sequencesCombination[m, 6] <- 0
144 }
145 }
146 }
147 }
148
149 ## SCORE MOYEN (sans prise en compte valeurs manquantes)
150 meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2)
151 seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore)
152
153 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL
154 seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ]
107 } 155 }
108 156
109 if (is.na(sequencesCombination[m, 3])) { 157 return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre))
110 compound <- as.character(sequencesCombination[m, 1])
111 for (c in seq_len(length(HMBCcompound)))
112 if (str_to_lower(compound) == str_to_lower(HMBCcompound[c]))
113 sequencesCombination[m, 3] <- 0
114 }
115
116 if (is.na(sequencesCombination[m, 4])) {
117 compound <- as.character(sequencesCombination[m, 1])
118 for (c in seq_len(length(HSQCcompound)))
119 if (str_to_lower(compound) == str_to_lower(HSQCcompound[c]))
120 sequencesCombination[m, 4] <- 0
121 }
122
123 if (is.na(sequencesCombination[m, 5])) {
124 compound <- as.character(sequencesCombination[m, 1])
125 for (c in seq_len(length(JREScompound)))
126 if (str_to_lower(compound) == str_to_lower(JREScompound[c]))
127 sequencesCombination[m, 5] <- 0
128 }
129
130 if (is.na(sequencesCombination[m, 6])) {
131 compound <- as.character(sequencesCombination[m, 1])
132 for (c in seq_len(length(TOCSYcompound)))
133 if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c]))
134 sequencesCombination[m, 6] <- 0
135 }
136 }
137
138 ## SCORE MOYEN (sans prise en compte valeurs manquantes)
139 meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2)
140 seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore)
141
142 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL
143 seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ]
144 }
145
146 return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre))
147 } 158 }