Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
diff annotationRmn2DWrapper.R @ 3:546c7ccd2ed4 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author | workflow4metabolomics |
---|---|
date | Fri, 04 Feb 2022 09:01:11 +0000 |
parents | dff7bde22102 |
children |
line wrap: on
line diff
--- a/annotationRmn2DWrapper.R Tue Feb 04 10:59:26 2020 -0500 +++ b/annotationRmn2DWrapper.R Fri Feb 04 09:01:11 2022 +0000 @@ -3,12 +3,11 @@ ## 201919016 2DNmrAnnotation_1.0.0.R ## Marie Tremblay-Franco ## MetaboHUB: The French Infrastructure for Metabolomics and Fluxomics -## www.metabohub.fr/en -## marie.tremblay-franco@toulouse.inra.fr +## marie.tremblay-franco@inrae.fr runExampleL <- FALSE -if(runExampleL) { +if (runExampleL) { ##------------------------------ ## Example of arguments ##------------------------------ @@ -20,6 +19,7 @@ ##------------------------------ strAsFacL <- options()$stringsAsFactors options(stringsAsFactors = FALSE) +options(digits = 8, scipen = 3) ##------------------------------ ## Constants @@ -41,9 +41,12 @@ library(openxlsx) library(stringr) library(tidyr) +library(curl) +library(jsonlite) +library(stringi) -if(!runExampleL) - argLs <- parseCommandArgs(evaluate=FALSE) +if (!runExampleL) + argLs <- parseCommandArgs(evaluate = FALSE) logFile <- argLs[["logOut"]] sink(logFile) @@ -53,11 +56,10 @@ ##------------------------------ ## Functions ##------------------------------ -source_local <- function(fname) -{ - argv <- commandArgs(trailingOnly = FALSE) - base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) - source(paste(base_dir, fname, sep="/")) +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) } #Import the different functions source_local("annotationRmn2D.R") @@ -66,78 +68,159 @@ ## Input parameter values fileToAnnotate <- argLs[[1]] + # Constraints values +ph <- argLs$pH +field <- argLs$magneticField + # Chosen sequence(s) cosy <- 0 hmbc <- 0 hsqc <- 0 jres <- 0 tocsy <- 0 -## sequences <- str_split(argLs[[2]], ",")[[1]] -## for (s in 1:length(sequences)) -## { -## argv <- commandArgs(trailingOnly = FALSE) -## currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) -## if (sequences[s]=="cosy"){ -## cosy <- 1 -## load(paste(currentDir, "BdDReference_COSY.RData", sep="/")) -## }else if(sequences[s]=="hmbc"){ -## hmbc <- 1 -## load(paste(currentDir, "BdDReference_HMBC.RData", sep="/")) -## }else if(sequences[s]=="hsqc"){ -## hsqc <- 1 -## load(paste(currentDir, "BdDReference_HSQC.RData", sep="/")) -## }else if(sequences[s]=="jres"){ -## jres <- 1 -## load(paste(currentDir, "BdDReference_JRES.RData", sep="/")) -## }else if(sequences[s]=="tocsy"){ -## tocsy <- 1 -## load(paste(currentDir, "BdDReference_TOCSY.RData", sep="/")) -## }else -## stop("No chosen sequence", call.=FALSE) -## } + +if (argLs$cosy_2dsequences == "yes") { + cosy <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=cosy&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + if (ph != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + if (field != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + + if (nrow(peakforestSpectra) != 0) { + BdDReference_COSY <- peakforestSpectra$peaks + names(BdDReference_COSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_COSY) <- enc2utf8(names(BdDReference_COSY)) + names(BdDReference_COSY) <- str_replace_all(names(BdDReference_COSY), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_COSY))) { + peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_COSY[[k]][, 2], ppm.dim2 = BdDReference_COSY[[k]][, 1], + BdDReference_COSY[[k]][, 3:ncol(BdDReference_COSY[[k]])]) + BdDReference_COSY[[k]] <- peakforestSpectra_df + } + } else { + stop("No COSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} -if (argLs[[2]]=='yes') -{ - argv <- commandArgs(trailingOnly = FALSE) - currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) - cosy <- 1 - load(paste(currentDir, "BdDReference_COSY.RData", sep="/")) +if (argLs$hmbc_2dsequences == "yes") { + hmbc <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hmbc&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + if (ph != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + if (field != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + + if (nrow(peakforestSpectra) != 0) { + + BdDReference_HMBC <- peakforestSpectra$peaks + names(BdDReference_HMBC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_HMBC) <- enc2utf8(names(BdDReference_HMBC)) + names(BdDReference_HMBC) <- str_replace_all(names(BdDReference_HMBC), "\u00e9", "e") + + peakforestSpectra_df <- data.frame() + for (k in seq_len(length(BdDReference_HMBC))) { + peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HMBC[[k]][, 2], ppm.dim2 = BdDReference_HMBC[[k]][, 1], + BdDReference_HMBC[[k]][, 3:ncol(BdDReference_HMBC[[k]])]) + BdDReference_HMBC[[k]] <- peakforestSpectra_df + } + } else { + stop("No HMBC spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) } -if (argLs[[3]]=='yes') -{ - argv <- commandArgs(trailingOnly = FALSE) - currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) - jres <- 1 - load(paste(currentDir, "BdDReference_JRES.RData", sep="/")) -} +if (argLs$hsqc_2dsequences == "yes") { + hsqc <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hsqc&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + if (field != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] -if (argLs[[4]]=='yes') -{ - argv <- commandArgs(trailingOnly = FALSE) - currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) - hmbc <- 1 - load(paste(currentDir, "BdDReference_HMBC.RData", sep="/")) + if (nrow(peakforestSpectra) != 0) { + BdDReference_HSQC <- peakforestSpectra$peaks + names(BdDReference_HSQC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_HSQC) <- enc2utf8(names(BdDReference_HSQC)) + names(BdDReference_HSQC) <- str_replace_all(names(BdDReference_HSQC), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_HSQC))) { + peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HSQC[[k]][, 2], ppm.dim2 = BdDReference_HSQC[[k]][, 1], + BdDReference_HSQC[[k]][, 3:ncol(BdDReference_HSQC[[k]])]) + BdDReference_HSQC[[k]] <- peakforestSpectra_df + } + } else { + stop("No HSQC spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) } -if (argLs[[5]]=='yes') -{ - argv <- commandArgs(trailingOnly = FALSE) - currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) - hsqc <- 1 - load(paste(currentDir, "BdDReference_HSQC.RData", sep="/")) +if (argLs$jres_2dsequences == "yes") { + jres <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=jres&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + if (field != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + + if (nrow(peakforestSpectra) != 0) { + BdDReference_JRES <- peakforestSpectra$peaks + names(BdDReference_JRES) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_JRES) <- enc2utf8(names(BdDReference_JRES)) + names(BdDReference_JRES) <- str_replace_all(names(BdDReference_JRES), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_JRES))) { + peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_JRES[[k]][, 2], ppm.dim2 = BdDReference_JRES[[k]][, 1], + BdDReference_JRES[[k]][, 3:ncol(BdDReference_JRES[[k]])]) + BdDReference_JRES[[k]] <- peakforestSpectra_df + } + } else { + stop("No JRES spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) } -if (argLs[[6]]=='yes') -{ - argv <- commandArgs(trailingOnly = FALSE) - currentDir <- dirname(substring(argv[grep("--file=", argv)], 8)) +if (argLs$tocsy_2dsequences == "yes") { tocsy <- 1 - load(paste(currentDir, "BdDReference_TOCSY.RData", sep="/")) + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=tocsy&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + if (field != 0) + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + + if (nrow(peakforestSpectra) != 0) { + BdDReference_TOCSY <- peakforestSpectra$peaks + names(BdDReference_TOCSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_TOCSY) <- enc2utf8(names(BdDReference_TOCSY)) + names(BdDReference_TOCSY) <- str_replace_all(names(BdDReference_TOCSY), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_TOCSY))) { + peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_TOCSY[[k]][, 2], ppm.dim2 = BdDReference_TOCSY[[k]][, 1], + BdDReference_TOCSY[[k]][, 3:ncol(BdDReference_TOCSY[[k]])]) + BdDReference_TOCSY[[k]] <- peakforestSpectra_df + } + } else { + stop("No TOCSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) } -if (argLs[[2]]=='no' & argLs[[3]]=='no' & argLs[[4]]=='no' & argLs[[5]]=='no' & argLs[[6]]=='no') - stop("No chosen sequence", call.=FALSE) +if (argLs$cosy_2dsequences == "no" & argLs$hmbc_2dsequences == "no" & argLs$hsqc_2dsequences == "no" & argLs$jres_2dsequences == "no" & argLs$tocsy_2dsequences == "no") + stop("No chosen sequence. You have to choose at least 1 sequence", call. = FALSE) # User database @@ -158,66 +241,60 @@ print(argLs) ## ANNOTATION -st0=Sys.time() -pdf(AnnotationGraph,onefile=TRUE) -annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, tolPpm1=tolPpm1, tolPpm2HJRes=tolPpm2HJRes, - tolPpm2C=tolPpm2C, cosy=cosy, hmbc=hmbc, hsqc=hsqc, - jres=jres, tocsy=tocsy, seuil=seuil, unicite=unicite) +st0 <- Sys.time() +pdf(AnnotationGraph, onefile = TRUE) +annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, tolPpm1 = tolPpm1, tolPpm2HJRes = tolPpm2HJRes, + tolPpm2C = tolPpm2C, cosy = cosy, hmbc = hmbc, hsqc = hsqc, + jres = jres, tocsy = tocsy, seuil = seuil, unicite = unicite) dev.off() -if (cosy==1) -{ - write.table(annotationMelange$COSY$liste_resultat, file=argLs[["annotationCOSY"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (cosy == 1) { + write.table(annotationMelange$COSY$liste_resultat, file = argLs[["annotationCOSY"]], quote = FALSE, + row.names = FALSE, sep = "\t") if (nrow(annotationMelange$COSY$listing_ppm_commun) != 0) - write.table(annotationMelange$COSY$listing_ppm_commun, file=argLs[["ppmCommunCOSY"]], quote=FALSE, - row.names=FALSE,sep="\t") + write.table(annotationMelange$COSY$listing_ppm_commun, file = argLs[["ppmCommunCOSY"]], quote = FALSE, + row.names = FALSE, sep = "\t") } -if (hmbc==1) -{ - write.table(annotationMelange$HMBC$liste_resultat, file=argLs[["annotationHMBC"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (hmbc == 1) { + write.table(annotationMelange$HMBC$liste_resultat, file = argLs[["annotationHMBC"]], quote = FALSE, + row.names = FALSE, sep = "\t") if (nrow(annotationMelange$HMBC$listing_ppm_commun) != 0) - write.table(annotationMelange$HMBC$listing_ppm_commun, file=argLs[["ppmCommunHMBC"]], quote=FALSE, - row.names=FALSE,sep="\t") + write.table(annotationMelange$HMBC$listing_ppm_commun, file = argLs[["ppmCommunHMBC"]], quote = FALSE, + row.names = FALSE, sep = "\t") } -if (hsqc==1) -{ - write.table(annotationMelange$HSQC$liste_resultat, file=argLs[["annotationHSQC"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (hsqc == 1) { + write.table(annotationMelange$HSQC$liste_resultat, file = argLs[["annotationHSQC"]], quote = FALSE, + row.names = FALSE, sep = "\t") if (nrow(annotationMelange$HSQC$listing_ppm_commun) != 0) - write.table(annotationMelange$HSQC$listing_ppm_commun, file=argLs[["ppmCommunHSQC"]], quote=FALSE, - row.names=FALSE,sep="\t") + write.table(annotationMelange$HSQC$listing_ppm_commun, file = argLs[["ppmCommunHSQC"]], quote = FALSE, + row.names = FALSE, sep = "\t") } -if (jres==1) -{ - write.table(annotationMelange$JRES$liste_resultat, file=argLs[["annotationJRES"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (jres == 1) { + write.table(annotationMelange$JRES$liste_resultat, file = argLs[["annotationJRES"]], quote = FALSE, + row.names = FALSE, sep = "\t") if (nrow(annotationMelange$JRES$listing_ppm_commun) != 0) - write.table(annotationMelange$JRES$listing_ppm_commun, file=argLs[["ppmCommunJRES"]], quote=FALSE, - row.names=FALSE,sep="\t") + write.table(annotationMelange$JRES$listing_ppm_commun, file = argLs[["ppmCommunJRES"]], quote = FALSE, + row.names = FALSE, sep = "\t") } -if (tocsy==1) -{ - write.table(annotationMelange$TOCSY$liste_resultat, file=argLs[["annotationTOCSY"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (tocsy == 1) { + write.table(annotationMelange$TOCSY$liste_resultat, file = argLs[["annotationTOCSY"]], quote = FALSE, + row.names = FALSE, sep = "\t") if (nrow(annotationMelange$TOCSY$listing_ppm_commun) != 0) - write.table(annotationMelange$TOCSY$listing_ppm_commun, file=argLs[["ppmCommunTOCSY"]], quote=FALSE, - row.names=FALSE,sep="\t") + write.table(annotationMelange$TOCSY$listing_ppm_commun, file = argLs[["ppmCommunTOCSY"]], quote = FALSE, + row.names = FALSE, sep = "\t") } ## Combinaison de sequences -if (cosy + jres + hmbc + hsqc + tocsy > 1) -{ - write.table(annotationMelange$combination, file=argLs[["annotationCombination"]], quote=FALSE, - row.names=FALSE,sep="\t") +if (cosy + jres + hmbc + hsqc + tocsy > 1) { + write.table(annotationMelange$combination, file = argLs[["annotationCombination"]], quote = FALSE, + row.names = FALSE, sep = "\t") } -st1=Sys.time() -print(st1-st0) +st1 <- Sys.time() +print(st1 - st0) ## Ending ##--------