Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
comparison annotationRmn2DGlobale.R @ 3:546c7ccd2ed4 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author | workflow4metabolomics |
---|---|
date | Fri, 04 Feb 2022 09:01:11 +0000 |
parents | dff7bde22102 |
children |
comparison
equal
deleted
inserted
replaced
2:dff7bde22102 | 3:546c7ccd2ed4 |
---|---|
1 ########################################################################################################################################### | 1 ################################################################################################### |
2 # ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) RMN # | 2 # ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) # |
3 # template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # | 3 # template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # |
4 # cosy : 1 si sequence a utiliser / 0 sinon # | 4 # cosy : 1 si sequence a utiliser / 0 sinon # |
5 # hmbc : 1 si sequence a utiliser / 0 sinon # | 5 # hmbc : 1 si sequence a utiliser / 0 sinon # |
6 # hsqc : 1 si sequence a utiliser / 0 sinon # | 6 # hsqc : 1 si sequence a utiliser / 0 sinon # |
7 # jres : 1 si sequence a utiliser / 0 sinon # | 7 # jres : 1 si sequence a utiliser / 0 sinon # |
8 # tocsy : 1 si sequence a utiliser / 0 sinon # | 8 # tocsy : 1 si sequence a utiliser / 0 sinon # |
9 # tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # | 9 # tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # |
10 # tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # | 10 # tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # |
11 # tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # | 11 # tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # |
12 # seuil : valeur du score de presence en deça de laquelle les metabolites annotes ne sont pas retenus # | 12 # seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # |
13 # unicite : boolean pour ne retenir que les ... # | 13 # unicite : boolean pour ne retenir que les ... # |
14 ########################################################################################################################################### | 14 ################################################################################################### |
15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) | 15 ## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) |
16 mean.rmNa <- function(x) | 16 mean.rmNa <- function(x) { |
17 { | 17 mean(x, na.rm = TRUE) |
18 mean(x, na.rm=TRUE) | |
19 } | 18 } |
20 | 19 |
21 annotationRmn2DGlobale <- function(template, tolPpm1=0.01, tolPpm2HJRes=0.002, tolPpm2C=0.5, cosy=1, hmbc=1, hsqc=1, jres=1, tocsy=1, | 20 annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { |
22 seuil, unicite="NO") | |
23 { | |
24 ## Initialisation | 21 ## Initialisation |
25 options (max.print=999999999) | 22 options(max.print = 999999999) |
26 annotationCOSY <- data.frame() | 23 annotationCOSY <- data.frame() |
27 annotationHMBC <- data.frame() | 24 annotationHMBC <- data.frame() |
28 annotationHSQC <- data.frame() | 25 annotationHSQC <- data.frame() |
29 annotationJRES <- data.frame() | 26 annotationJRES <- data.frame() |
30 annotationTOCSY <- data.frame() | 27 annotationTOCSY <- data.frame() |
32 dataCOSY <- "NA" | 29 dataCOSY <- "NA" |
33 dataHMBC <- "NA" | 30 dataHMBC <- "NA" |
34 dataHSQC <- "NA" | 31 dataHSQC <- "NA" |
35 dataJRES <- "NA" | 32 dataJRES <- "NA" |
36 dataTOCSY <- "NA" | 33 dataTOCSY <- "NA" |
37 | 34 |
38 ## Application seuil seulement si annotation avec 1 seule sequence | 35 ## Application seuil seulement si annotation avec 1 seule sequence |
39 ## seuilPls2D <- 0 | |
40 ## if ((sum(cosy, hmbc, hsqc, jres, tocsy)) == 1) | |
41 ## seuilPls2D <- seuil | |
42 seuilPls2D <- seuil | 36 seuilPls2D <- seuil |
43 | 37 |
44 if (cosy == 1) | 38 if (cosy == 1) { |
45 { | 39 matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
46 matrice.cosy <- read.xlsx(template, sheet="COSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
47 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] | 40 matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] |
48 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, | 41 annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) |
49 unicite=unicite) | 42 dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) |
50 dataCOSY <- data.frame(Metabolite=str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY=annotationCOSY$liste_resultat$score) | |
51 dataCOSY <- unique.data.frame(dataCOSY) | 43 dataCOSY <- unique.data.frame(dataCOSY) |
52 } | 44 } |
53 | 45 |
54 if (hmbc == 1) | 46 if (hmbc == 1) { |
55 { | 47 matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
56 matrice.hmbc <- read.xlsx(template, sheet="HMBC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
57 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] | 48 matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] |
58 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, | 49 annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) |
59 unicite=unicite) | 50 dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) |
60 dataHMBC <- data.frame(Metabolite=str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC=annotationHMBC$liste_resultat$score) | |
61 dataHMBC <- unique.data.frame(dataHMBC) | 51 dataHMBC <- unique.data.frame(dataHMBC) |
62 } | 52 } |
63 | 53 |
64 if (hsqc == 1) | 54 if (hsqc == 1) { |
65 { | 55 matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
66 matrice.hsqc <- read.xlsx(template, sheet="HSQC", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
67 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] | 56 matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] |
68 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2C, seuil=seuilPls2D, | 57 annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) |
69 unicite=unicite) | 58 dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) |
70 dataHSQC <- data.frame(Metabolite=str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC=annotationHSQC$liste_resultat$score) | |
71 dataHSQC <- unique.data.frame(dataHSQC) | 59 dataHSQC <- unique.data.frame(dataHSQC) |
72 } | 60 } |
73 | 61 |
74 if (jres == 1) | 62 if (jres == 1) { |
75 { | 63 matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
76 matrice.jres <- read.xlsx(template, sheet="JRES", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
77 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] | 64 matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] |
78 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol=tolPpm1, ppm2Tol=tolPpm2HJRes, seuil=seuilPls2D, | 65 annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) |
79 unicite=unicite) | 66 dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) |
80 dataJRES <- data.frame(Metabolite=str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES=annotationJRES$liste_resultat$score) | |
81 dataJRES <- unique.data.frame(dataJRES) | 67 dataJRES <- unique.data.frame(dataJRES) |
82 } | 68 } |
83 | 69 |
84 if (tocsy == 1) | 70 if (tocsy == 1) { |
85 { | 71 matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") |
86 matrice.tocsy <- read.xlsx(template, sheet="TOCSY", startRow=2, colNames=TRUE, rowNames=FALSE, cols=1:3, na.strings="NA") | |
87 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] | 72 matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] |
88 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol=tolPpm1, ppm2Tol=tolPpm1, seuil=seuilPls2D, | 73 annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) |
89 unicite=unicite) | 74 dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) |
90 dataTOCSY <- data.frame(Metabolite=str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY=annotationTOCSY$liste_resultat$score) | |
91 dataTOCSY <- unique.data.frame(dataTOCSY) | 75 dataTOCSY <- unique.data.frame(dataTOCSY) |
92 } | 76 } |
93 | 77 |
94 sequencesCombinationAverageScoreSeuil <- data.frame() | 78 seqCombiMeanScoreSeuil <- data.frame() |
95 sequencesCombinationAverageScoreSeuilFiltre <- data.frame() | 79 seqCombiMeanScoreSeuilFiltre <- data.frame() |
96 | 80 |
97 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES | 81 ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES |
98 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) | 82 data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) |
99 whichSequenceNaN <- which((data2D != "NA")) | 83 whichSequenceNaN <- which((data2D != "NA")) |
100 data2D <- data2D[whichSequenceNaN] | 84 data2D <- data2D[whichSequenceNaN] |
101 sequencesCombination <- data.frame(data2D[1]) | 85 sequencesCombination <- data.frame(data2D[1]) |
102 sequencesCombinationAverageScore <- sequencesCombination | 86 seqCombiMeanScore <- sequencesCombination |
103 | 87 |
104 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D | 88 ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D |
105 if (length(data2D) >= 2) | 89 if (length(data2D) >= 2) { |
106 { | |
107 ## CONCATENATION SCORE PAR SEQUENCE | 90 ## CONCATENATION SCORE PAR SEQUENCE |
108 for (l in 2:length(data2D)) | 91 for (l in 2:length(data2D)) |
109 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by="Metabolite", all.x=TRUE, all.y=TRUE) | 92 sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) |
110 | 93 |
111 ## SCORE MOYEN (sans prise en compte valeurs manquantes) | 94 ## Replacement of NA values due to mis annotation |
112 meanScore <- apply(sequencesCombination[, -1], 1, FUN=mean.rmNa) | 95 for (m in seq_len(nrow(sequencesCombination))) { |
113 sequencesCombinationAverageScore <- cbind.data.frame(sequencesCombination, averageScore=meanScore) | 96 COSYcompound <- sort(names(BdDReference_COSY)) |
114 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL | 97 HMBCcompound <- sort(names(BdDReference_HMBC)) |
115 ## sequencesCombinationAverageScoreSeuilFiltre <- filter(sequencesCombinationAverageScore, averageScore >= seuil) | 98 HSQCcompound <- sort(names(BdDReference_HSQC)) |
116 sequencesCombinationAverageScoreSeuilFiltre <- sequencesCombinationAverageScore[sequencesCombinationAverageScore$averageScore > seuil, ] | 99 JREScompound <- sort(names(BdDReference_JRES)) |
100 TOCSYcompound <- sort(names(BdDReference_TOCSY)) | |
101 | |
102 if (is.na(sequencesCombination[m, 2])) { | |
103 compound <- as.character(sequencesCombination[m, 1]) | |
104 for (c in seq_len(length(COSYcompound))) | |
105 if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) | |
106 sequencesCombination[m, 2] <- 0 | |
107 } | |
108 | |
109 if (is.na(sequencesCombination[m, 3])) { | |
110 compound <- as.character(sequencesCombination[m, 1]) | |
111 for (c in seq_len(length(HMBCcompound))) | |
112 if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) | |
113 sequencesCombination[m, 3] <- 0 | |
114 } | |
115 | |
116 if (is.na(sequencesCombination[m, 4])) { | |
117 compound <- as.character(sequencesCombination[m, 1]) | |
118 for (c in seq_len(length(HSQCcompound))) | |
119 if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) | |
120 sequencesCombination[m, 4] <- 0 | |
121 } | |
122 | |
123 if (is.na(sequencesCombination[m, 5])) { | |
124 compound <- as.character(sequencesCombination[m, 1]) | |
125 for (c in seq_len(length(JREScompound))) | |
126 if (str_to_lower(compound) == str_to_lower(JREScompound[c])) | |
127 sequencesCombination[m, 5] <- 0 | |
128 } | |
129 | |
130 if (is.na(sequencesCombination[m, 6])) { | |
131 compound <- as.character(sequencesCombination[m, 1]) | |
132 for (c in seq_len(length(TOCSYcompound))) | |
133 if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) | |
134 sequencesCombination[m, 6] <- 0 | |
135 } | |
117 } | 136 } |
118 | 137 |
119 return(list(COSY=annotationCOSY, HMBC=annotationHMBC, HSQC=annotationHSQC, JRES=annotationJRES, TOCSY=annotationTOCSY, | 138 ## SCORE MOYEN (sans prise en compte valeurs manquantes) |
120 combination=sequencesCombinationAverageScoreSeuilFiltre)) | 139 meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) |
140 seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) | |
141 | |
142 ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL | |
143 seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] | |
144 } | |
145 | |
146 return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) | |
121 } | 147 } |