Mercurial > repos > lain > xseekerpreparator
changeset 19:2937e72e5891 draft
" master branch Updating"
| author | lain | 
|---|---|
| date | Tue, 18 Oct 2022 12:57:28 +0000 | 
| parents | 2c7e7fd1f740 | 
| children | ce94e7a141bb | 
| files | README.md XSeekerPreparator.R XSeekerPreparator.xml data/models.R format_versionning.MD galaxy/tools/LC-MSMS/XSeekerPreparator.R galaxy/tools/LC-MSMS/XSeekerPreparator.xml test/test.sh | 
| diffstat | 8 files changed, 1157 insertions(+), 1272 deletions(-) [+] | 
line wrap: on
 line diff
--- a/README.md Tue Feb 01 18:09:11 2022 +0000 +++ b/README.md Tue Oct 18 12:57:28 2022 +0000 @@ -96,7 +96,7 @@ -------- - **@name**: XSeekerPreparator - - **@version**: 1.2.4 + - **@version**: 1.3.0 - **@authors**: Lain Pavot - **@date creation**: 15/09/2020
--- a/XSeekerPreparator.R Tue Feb 01 18:09:11 2022 +0000 +++ b/XSeekerPreparator.R Tue Oct 18 12:57:28 2022 +0000 @@ -1,17 +1,27 @@ -TOOL_NAME <- "XSeekerPreparator" -VERSION <- "1.2.4" +assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv()) +lockBinding("TOOL_NAME", globalenv()) +assign("VERSION", "1.3.0", envir = globalenv()) +lockBinding("VERSION", globalenv()) +assign("DEBUG_FAST", FALSE, envir = globalenv()) +lockBinding("DEBUG_FAST", globalenv()) +assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv()) +lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv()) +assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv()) +lockBinding("PROCESS_SMOL_BATCH", globalenv()) +assign("FAST_FEATURE_RATIO", 10, envir = globalenv()) +lockBinding("FAST_FEATURE_RATIO", globalenv()) +assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv()) +lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv()) -DEBUG_FAST <- FALSE -DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST -PROCESS_SMOL_BATCH <- DEBUG_FAST -FAST_FEATURE_RATIO <- 10 - -OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" - -ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-") -ENRICHED_RDATA_DOC <- sprintf(" +assign( + "ENRICHED_RDATA_VERSION", + paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"), + envir = globalenv() +) +lockBinding("ENRICHED_RDATA_VERSION", globalenv()) +assign("ENRICHED_RDATA_DOC", sprintf(" Welcome to the enriched <Version %s> of the output of CAMERA/xcms. This doc was generated by the tool: %s - Version %s To show the different variables contained in this rdata, type: @@ -41,7 +51,11 @@ - enriched_rdata_version: - Description: A flag created by that tool to tell which version of this tool has enriched the rdata. - - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION) + - Retrieval method: + enriched_rdata_version <- sprintf( + \"%s\", + ENRICHED_RDATA_VERSION + ) - enriched_rdata_doc: - Description: Contains the documentation string. @@ -81,7 +95,10 @@ - polarity: - Description: Those are the polarity values from the original mzxml file, extracted using xcms 2. - - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]]) + - Retrieval method: + as.character(xcms::xcmsRaw( + 'original_file.mzxml' + )@polarity[[1]]) - xcms version: 2.0 Data taken from incoming rdata @@ -103,12 +120,18 @@ process_params <- list() for (list_name in names(rdata_file$listOFlistArguments)) { param_list <- list() - for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) { - param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]] + for (param_name in names( + rdata_file$listOFlistArguments[[list_name]] + )) { + param_list[[param_name]] <- rdata_file$listOFlistArguments[[ + list_name + ]][[param_name]] } process_params[[length(process_params)+1]] <- param_list } -", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION) +", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION), +envir = globalenv()) +lockBinding("ENRICHED_RDATA_DOC", globalenv()) @@ -120,21 +143,21 @@ } ## galaxy mangles the "@" to a "__at__" if (substr(path, 1, 9) == "git__at__") { - path <- sub("^git__at__", "git@", path, perl=TRUE) + path <- sub("^git__at__", "git@", path, perl = TRUE) } if ( substr(path, 1, 4) == "git@" - || substr(path, length(path)-4, 4) == ".git" + || substr(path, length(path) - 4, 4) == ".git" ) { - return (get_models_from_git(path)) + return(get_models_from_git(path)) } if (substr(path, 1, 4) == "http") { - return (get_models_from_url(path)) + return(get_models_from_url(path)) } - return (source(path)$value) + return(source(path)$value) } -get_models_from_git <- function (url, target_file="models.R", rm=TRUE) { +get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) { tmp <- tempdir() message(sprintf("Cloning %s", url)) system2("git", c("clone", url, tmp)) @@ -142,12 +165,12 @@ if (!is.null(result)) { models <- source(result)$value if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } - return (models) + return(models) } if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } stop(sprintf( "Could not find any file named \"%s\" in this repo", @@ -155,19 +178,19 @@ )) } -get_models_from_url <- function (url, target_file="models.R", rm=TRUE) { +get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) { tmp <- tempdir() message(sprintf("Downloading %s", url)) result <- file.path(tmp, target_file) - if (download.file(url, destfile=result) == 0) { + if (download.file(url, destfile = result) == 0) { models <- source(result)$value if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } - return (models) + return(models) } if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } stop("Could not download any file at this adress.") } @@ -178,162 +201,162 @@ if (is.dir(file)) { result <- search_tree(file.path(path, file), target) if (!is.null(result)) { - return (result) + return(result) } } else if (tolower(file) == target) { - return (file.path(path, file)) + return(file.path(path, file)) } } - return (NULL) + return(NULL) } create_database <- function(orm) { - orm$recreate_database(no_exists=FALSE) + orm$recreate_database(no_exists = FALSE) set_database_version(orm, "created") } insert_adducts <- function(orm) { message("Creating adducts...") adducts <- list( - list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"), - list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"), - list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"), - list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"), - list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), - list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), - list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"), - list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), - list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), - list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"), - list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), - list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), - list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"), - list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"), - list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"), - list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"), - list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"), - list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"), - list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"), - list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"), - list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"), - list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"), - list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"), - list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), - list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"), - list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), - list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"), - list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"), - list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"), - list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), - list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), - list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"), - list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"), - list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), - list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), - list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"), - list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"), - list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"), - list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), - list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), - list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"), - list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"), - list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"), - list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"), - list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), - list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), - list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"), - list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), - list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), - list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"), - list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"), - list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"), - list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"), - list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"), - list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"), - list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), - list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), - list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"), - list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), - list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), - list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"), - list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"), - list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"), - list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"), - list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"), - list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"), - list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"), - list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"), - list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"), - list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"), - list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), - list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), - list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"), - list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), - list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), - list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"), - list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"), - list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), - list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), - list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"), - list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"), - list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"), - list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"), - list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"), - list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), - list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), - list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"), - list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"), - list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"), - list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"), - list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"), - list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"), - list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"), - list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"), - list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"), - list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"), - list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"), - list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"), - list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"), - list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"), - list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"), - list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"), - list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"), - list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"), - list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"), - list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0") + list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"), + list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"), + list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"), + list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"), + list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), + list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), + list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"), + list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), + list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), + list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"), + list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), + list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), + list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"), + list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"), + list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"), + list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"), + list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"), + list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"), + list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"), + list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"), + list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"), + list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"), + list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"), + list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), + list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"), + list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), + list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"), + list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"), + list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"), + list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), + list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), + list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"), + list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"), + list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), + list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), + list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"), + list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"), + list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"), + list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), + list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), + list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"), + list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"), + list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"), + list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"), + list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), + list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), + list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"), + list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), + list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), + list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"), + list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), + list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), + list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"), + list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"), + list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"), + list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), + list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), + list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"), + list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), + list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), + list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"), + list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), + list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), + list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"), + list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"), + list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), + list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), + list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"), + list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"), + list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"), + list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), + list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), + list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"), + list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), + list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), + list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"), + list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"), + list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), + list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), + list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"), + list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"), + list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"), + list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"), + list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"), + list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), + list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), + list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"), + list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), + list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), + list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), + list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), + list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"), + list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"), + list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"), + list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"), + list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"), + list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"), + list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"), + list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"), + list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"), + list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"), + list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"), + list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"), + list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"), + list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"), + list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0") ) dummy_adduct <- orm$adduct() for (adduct in adducts) { i <- 0 - dummy_adduct$set_name(adduct[[i <- i+1]]) - dummy_adduct$set_multi(adduct[[i <- i+1]]) - dummy_adduct$set_charge(adduct[[i <- i+1]]) - dummy_adduct$set_mass(adduct[[i <- i+1]]) - dummy_adduct$set_oidscore(adduct[[i <- i+1]]) - dummy_adduct$set_quasi(adduct[[i <- i+1]]) - dummy_adduct$set_ips(adduct[[i <- i+1]]) - dummy_adduct$set_formula_add(adduct[[i <- i+1]]) - dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) + dummy_adduct$set_name(adduct[[i <- i + 1]]) + dummy_adduct$set_multi(adduct[[i <- i + 1]]) + dummy_adduct$set_charge(adduct[[i <- i + 1]]) + dummy_adduct$set_mass(adduct[[i <- i + 1]]) + dummy_adduct$set_oidscore(adduct[[i <- i + 1]]) + dummy_adduct$set_quasi(adduct[[i <- i + 1]]) + dummy_adduct$set_ips(adduct[[i <- i + 1]]) + dummy_adduct$set_formula_add(adduct[[i <- i + 1]]) + dummy_adduct$set_formula_ded(adduct[[i <- i + 1]]) invisible(dummy_adduct$save()) - dummy_adduct$clear(unset_id=TRUE) + dummy_adduct$clear(unset_id = TRUE) } message("Adducts created") } -insert_base_data <- function(orm, path, archetype=FALSE) { +insert_base_data <- function(orm, path, archetype = FALSE) { if (archetype) { ## not implemented yet - return () + return() } base_data <- readLines(path) - for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) { + for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) { orm$execute(sql) } set_database_version(orm, "enriched") } insert_compounds <- function(orm, compounds_path) { - compounds <- read.csv(file=compounds_path, sep="\t") + compounds <- read.csv(file = compounds_path, sep = "\t") if (is.null(compounds <- translate_compounds(compounds))) { stop("Could not find asked compound's attributes in csv file.") } @@ -344,18 +367,21 @@ dummy_compound$set_name(compounds[i, "name"]) dummy_compound$set_common_name(compounds[i, "common_name"]) dummy_compound$set_formula(compounds[i, "formula"]) - compound_list[[length(compound_list)+1]] <- as.list( + compound_list[[length(compound_list) + 1]] <- as.list( dummy_compound, c("mz", "name", "common_name", "formula") ) - dummy_compound$clear(unset_id=TRUE) + dummy_compound$clear(unset_id = TRUE) } - invisible(dummy_compound$save(bulk=compound_list)) + invisible(dummy_compound$save(bulk = compound_list)) } translate_compounds <- function(compounds) { recognized_headers <- list( - c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") + c( + "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", + "MetName", "ChemFormula", "INChIkey" + ) ) header_translators <- list( hmdb_header_translator @@ -363,23 +389,23 @@ for (index in seq_along(recognized_headers)) { headers <- recognized_headers[[index]] if (identical(colnames(compounds), headers)) { - return (header_translators[[index]](compounds)) + return(header_translators[[index]](compounds)) } } if (is.null(translator <- guess_translator(colnames(compounds)))) { - return (NULL) + return(NULL) } - return (csv_header_translator(translator, compounds)) + return(csv_header_translator(translator, compounds)) } guess_translator <- function(header) { result <- list( - # HMDB_ID=NULL, - mz=NULL, - name=NULL, - common_name=NULL, - formula=NULL, - # inchi_key=NULL + # HMDB_ID = NULL, + mz = NULL, + name = NULL, + common_name = NULL, + formula = NULL, + # inchi_key = NULL ) asked_cols <- names(result) for (asked_col in asked_cols) { @@ -395,39 +421,39 @@ } } if (any(mapply(is.null, result))) { - return (NULL) + return(NULL) } - return (result) + return(result) } hmdb_header_translator <- function(compounds) { - return (csv_header_translator( + return(csv_header_translator( list( - HMDB_ID="HMDB_ID", - mz="MzBank", - name="MetName", - common_name="MetName", - formula="ChemFormula", - inchi_key="INChIkey" + HMDB_ID = "HMDB_ID", + mz = "MzBank", + name = "MetName", + common_name = "MetName", + formula = "ChemFormula", + inchi_key = "INChIkey" ), compounds )) } csv_header_translator <- function(translation_table, csv) { header_names <- names(translation_table) - result <- data.frame(1:nrow(csv)) + result <- data.frame(seq_len(nrow(csv))) for (i in seq_along(header_names)) { result[, header_names[[i]]] <- csv[, translation_table[[i]]] } result[, "mz"] <- as.numeric(result[, "mz"]) - return (result) + return(result) } set_database_version <- function(orm, version) { orm$set_tag( version, - tag_name="database_version", - tag_table_name="XSeeker_tagging_table" + tag_name = "database_version", + tag_table_name = "XSeeker_tagging_table" ) } @@ -444,15 +470,16 @@ error <- tryCatch({ process_sample_list( orm, rdata, samples, - show_percent=show_percent + show_percent = show_percent, + file_grouping_var = options$class ) NULL - }, error=function(e) { + }, error = function(e) { message(e) e }) if (!is.null(mzml_tmp_dir)) { - unlink(mzml_tmp_dir, recursive=TRUE) + unlink(mzml_tmp_dir, recursive = TRUE) } if (!is.null(error)) { stop(error) @@ -463,23 +490,49 @@ if (is.null(rdata$singlefile)) { message("Extracting mxml files") tmp <- tempdir() - rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp) - names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile)) + rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp) + names(rdata$singlefile) <- tools::file_path_sans_ext( + basename(rdata$singlefile) + ) message("Extracted") - return (tmp) + return(tmp) } else { - message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; "))) + message(sprintf( + "Not a zip file, loading files directly from path: %s", + paste(rdata$singlefile, collapse = " ; ") + )) } - return (NULL) + return(NULL) } -process_sample_list <- function(orm, radta, sample_names, show_percent) { - file_grouping_var <- find_grouping_var(rdata$variableMetadata) +process_sample_list <- function( + orm, + rdata, + sample_names, + show_percent, + file_grouping_var = NULL +) { + if (is.null(file_grouping_var)) { + file_grouping_var <- find_grouping_var(rdata$variableMetadata) + if (is.null(file_grouping_var)) { + stop("Malformed variableMetada.") + } + } + tryCatch({ + headers <- colnames(rdata$variableMetadata) + file_grouping_var <- headers[[as.numeric(file_grouping_var)]] + }, error = function(e) NULL) + if ( + is.null(file_grouping_var) + || !(file_grouping_var %in% colnames(rdata$variableMetadata)) + ) { + stop(sprintf( + "Could not find grouping variable %s in var meta file.", + file_grouping_var + )) + } message("Processing samples.") message(sprintf("File grouping variable: %s", file_grouping_var)) - if(is.null(file_grouping_var)) { - stop("Malformed variableMetada.") - } context <- new.env() context$samples <- list() @@ -492,7 +545,6 @@ process_params <- list() if (is.null(process_arg_list)) { - histories <- list() for (history in xcms_set@.processHistory) { if ( class(history@param) == "CentWaveParam" @@ -500,19 +552,23 @@ ) { params <- history@param process_params <- list(list( - xfunction="annotatediff", - ppm=params@ppm, - peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]), - snthresh=params@snthresh, - prefilterStep=params@prefilter[[1]], - prefilterLevel=params@prefilter[[2]], - mzdiff=params@mzdiff, - fitgauss=params@fitgauss, - noise=params@noise, - mzCenterFun=params@mzCenterFun, - integrate=params@integrate, - firstBaselineCheck=params@firstBaselineCheck, - snthreshIsoROIs=!identical(params@roiScales, numeric(0)) + xfunction = "annotatediff", + ppm = params@ppm, + peakwidth = sprintf( + "%s - %s", + params@peakwidth[[1]], + params@peakwidth[[2]] + ), + snthresh = params@snthresh, + prefilterStep = params@prefilter[[1]], + prefilterLevel = params@prefilter[[2]], + mzdiff = params@mzdiff, + fitgauss = params@fitgauss, + noise = params@noise, + mzCenterFun = params@mzCenterFun, + integrate = params@integrate, + firstBaselineCheck = params@firstBaselineCheck, + snthreshIsoROIs = !identical(params@roiScales, numeric(0)) )) break } @@ -521,9 +577,11 @@ for (list_name in names(process_arg_list)) { param_list <- list() for (param_name in names(process_arg_list[[list_name]])) { - param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] + param_list[[param_name]] <- process_arg_list[[ + list_name + ]][[param_name]] } - process_params[[length(process_params)+1]] <- param_list + process_params[[length(process_params) + 1]] <- param_list } } @@ -531,6 +589,17 @@ indices <- as.numeric(unique(var_meta[, file_grouping_var])) + if (any(is.null(names(singlefile)[indices]))) { + stop(sprintf( + paste( + "Indices defined by grouping variable %s are not all present", + "in singlefile names (%s).\nCannot continue. Indices: %s" + ), + file_grouping_var, + paste(names(singlefile), collapse = ", "), + paste(indices, collapse = ", ") + )) + } smol_xcms_set <- orm$smol_xcms_set() mz_tab_info <- new.env() g <- xcms::groups(xcms_set) @@ -538,10 +607,16 @@ mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) mz_tab_info$sampnames <- xcms::sampnames(xcms_set) mz_tab_info$sampclass <- xcms::sampclass(xcms_set) - mz_tab_info$rtmed <- g[,"rtmed"] - mz_tab_info$mzmed <- g[,"mzmed"] - mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") - blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) + mz_tab_info$rtmed <- g[, "rtmed"] + mz_tab_info$mzmed <- g[, "mzmed"] + mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval( + xcms_set, + value = "into" + ) + blogified <- blob::blob(fst::compress_fst( + serialize(mz_tab_info, NULL), + compression = 100 + )) rm(mz_tab_info) invisible(smol_xcms_set$set_raw(blogified)$save()) @@ -582,9 +657,9 @@ env$enriched_rdata_doc <- ENRICHED_RDATA_DOC sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) - rm (env) + rm(env) context$samples[no] <- sample$get_id() - rm (sample) + rm(sample) } context$clusters <- list() context$show_percent <- show_percent @@ -597,28 +672,32 @@ message("Features enrichment") complete_features(orm, clusters, show_percent) message("Features enrichment done.") - return (NULL) + return(NULL) } find_grouping_var <- function(var_meta) { - known_colnames = c( + known_colnames <- c( "name", "namecustom", "mz", "mzmin", "mzmax", - "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup" + "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", + "pcgroup", "ms_level" ) col_names <- colnames(var_meta) - classes = list() + classes <- list() for (name in col_names) { if (!(name %in% known_colnames)) { - classes[[length(classes)+1]] = name + classes[[length(classes) + 1]] <- name } } if (length(classes) > 1) { - stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes))) + stop(sprintf( + "Only one class expected in the variable metadata. Found %d .", + length(classes) + )) } if (length(classes) == 0) { stop("Could not find any class column in your variableMetadata.") } - return (classes[[1]]) + return(classes[[1]]) } add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { @@ -629,12 +708,15 @@ $set_path(env$dataset_path) $set_kind("enriched_rdata") $set_polarity( - if (is.null(env$polarity) || identical(env$polarity, character(0))) "" + if ( + is.null(env$polarity) + || identical(env$polarity, character(0)) + ) "" else env$polarity ) $set_raw(blob::blob(fst::compress_fst( serialize(env, NULL), - compression=100 + compression = 100 ))) ) sample[["smol_xcms_set_id"]] <- smol_xcms_set_id @@ -642,7 +724,7 @@ sample <- sample$save() load_process_params(orm, sample, env$process_params) message(sprintf("Sample %s inserted.", env$sample_name)) - return (sample) + return(sample) } @@ -660,14 +742,14 @@ next_pc_group, next_align_group )) message("Extracting features done.") - return (NULL) + return(NULL) } get_next_id <- function(models, attribute) { if ((id <- models$max(attribute)) == Inf || id == -Inf) { - return (0) + return(0) } - return (id) + return(id) } create_features <- function( @@ -676,7 +758,7 @@ next_pc_group, next_align_group ) { field_names <- as.list(names(orm$feature()$fields__)) - field_names[field_names=="id"] <- NULL + field_names[field_names == "id"] <- NULL features <- list() dummy_feature <- orm$feature() @@ -688,13 +770,13 @@ rows <- seq_len(nrow(var_meta)) if (PROCESS_SMOL_BATCH) { - rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] + rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] } cluster_row <- list() for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 - message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) + message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) } dummy_feature$set_featureID(next_feature_id) @@ -710,27 +792,44 @@ peak_list <- context$peaks[context$groupidx[[row]], ] if (! ("matrix" %in% class(peak_list))) { - peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) + peak_list <- matrix( + peak_list, + nrow = 1, + ncol = length(peak_list), + dimnames = list(c(), names(peak_list)) + ) } clusterID <- as.character(clusterID) if (is.null(context$central_feature[[clusterID]])) { int_o <- extract_peak_var(peak_list, "into") context$central_feature[[clusterID]] <- ( - peak_list[peak_list[, "into"] == int_o,]["sample"] + peak_list[peak_list[, "into"] == int_o, ]["sample"] ) } if (!DEBUG_FAST_IGNORE_SLOW_OP) { - sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] - if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { - if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { + central_feature <- context$central_feature[[clusterID]] + sample_peak_list <- peak_list[ + as.integer(peak_list[, "sample"]) == central_feature, + , + drop = FALSE + ] + if ( + !identical(sample_peak_list, numeric(0)) + && !is.null(nrow(sample_peak_list)) + && nrow(sample_peak_list) != 0 + ) { + int_o <- extract_peak_var(sample_peak_list, "into") + if (!is.na(int_o)) { dummy_feature$set_int_o(int_o) } - if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { + int_b <- extract_peak_var(sample_peak_list, "intb") + if (!is.na(int_b)) { dummy_feature$set_int_b(int_b) } - if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { + max_o <- extract_peak_var(sample_peak_list, "maxo") + if (!is.na(max_o)) { dummy_feature$set_max_o(max_o) } } @@ -744,13 +843,13 @@ next_align_group ) next_align_group <- next_align_group + 1 - features[[length(features)+1]] <- as.list(dummy_feature, field_names) + features[[length(features) + 1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } rm(var_meta) message("") message("Saving features") - invisible(dummy_feature$save(bulk=features)) + invisible(dummy_feature$save(bulk = features)) ## We link manually clusters to the sample they're in. link_cache <- list() @@ -758,24 +857,25 @@ sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) for (sample_id in context$samples[sample_nos]) { cluster_id <- cluster_row[[row]]$get_id() - if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { + id <- paste(sample_id, cluster_id, sep = ";") + if (is.null(link_cache[[id]])) { link_cache[[id]] <- 1 orm$cluster_sample( - sample_id=sample_id, - cluster_id=cluster_id + sample_id = sample_id, + cluster_id = cluster_id )$save() } } } message("Saved.") - return (context$clusters) + return(context$clusters) } -extract_peak_var <- function(peak_list, var_name, selector=max) { +extract_peak_var <- function(peak_list, var_name, selector = max) { value <- peak_list[, var_name] names(value) <- NULL - return (selector(value)) + return(selector(value)) } set_feature_fields_from_var_meta <- function(feature, var_meta) { @@ -800,24 +900,24 @@ if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { feature$set_iso(isotopes) } - return (feature) + return(feature) } extract_iso <- function(weird_data) { if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { - return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE)) + return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE)) } - return (weird_data) + return(weird_data) } -extract_clusterID <- function(weird_data, next_cluster_id){ +extract_clusterID <- function(weird_data, next_cluster_id) { if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) } else { clusterID <- 0 } - return (clusterID + next_cluster_id) + return(clusterID + next_cluster_id) } create_associated_cluster <- function( @@ -831,24 +931,29 @@ adduct_name <- as.character(curent_var_meta[["adduct"]]) annotation <- curent_var_meta[["isotopes"]] cluster <- context$clusters[[clusterID]] <- orm$cluster( - pc_group=pcgroup + next_pc_group, + pc_group = pcgroup + next_pc_group, # adduct=adduct, - align_group=next_align_group, + align_group = next_align_group, # curent_group=curent_group, - clusterID=context$clusterID, - annotation=annotation + clusterID = context$clusterID, + annotation = annotation ) if (is.null(adduct <- context$adducts[[adduct_name]])) { - context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first() + context$adducts[[adduct_name]] <- orm$adduct()$load_by( + name = adduct_name + )$first() if (is.null(adduct <- context$adducts[[adduct_name]])) { - adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0) + adduct <- context$adducts[[adduct_name]] <- orm$adduct( + name = adduct_name, + charge = 0 + ) adduct$save() } } cluster$set_adduct(adduct) - ## Crappy hack to assign sample id to cluster without loading the sample. - ## Samples are too big (their sample$env) and slows the process, and eat all the menory - ## so we dont't want to load them. + ## Crappy hack to assign sample id to cluster without loading the + ## sample. Samples are too big (their sample$env) and slows the + ## process, and eat all the menory so we dont't want to load them. cluster[["sample_id"]] <- main_sample_id cluster$modified__[["sample_id"]] <- main_sample_id } else { @@ -858,7 +963,7 @@ } cluster$save() feature$set_cluster(cluster) - return (cluster) + return(cluster) } complete_features <- function(orm, clusters, show_percent) { @@ -866,20 +971,25 @@ percent <- -1 i <- 0 for (cluster in clusters) { - i <- i+1 + i <- i + 1 if (show_percent && (i / total) * 100 > percent) { percent <- percent + 1 - message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) + message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) } - features <- orm$feature()$load_by(cluster_id=cluster$get_id()) + features <- orm$feature()$load_by(cluster_id = cluster$get_id()) if (features$any()) { if (!is.null(rt <- features$mean("rt"))) { cluster$set_mean_rt(rt)$save() } features_df <- as.data.frame(features) - central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ] + central_feature <- features_df[ + grepl("^\\[M\\]", features_df[, "iso"]), + ] central_feature_into <- central_feature[["int_o"]] - if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) { + if ( + !identical(central_feature_into, numeric(0)) + && central_feature_into != 0 + ) { for (feature in as.vector(features)) { feature$set_abundance( feature$get_int_o() / central_feature_into * 100 @@ -888,7 +998,7 @@ } } } - return (NULL) + return(NULL) } load_process_params <- function(orm, sample, params) { @@ -900,19 +1010,23 @@ load_process_params_peak_picking(orm, sample, param_list) } } - return (sample) + return(sample) } -load_process_params_peak_picking <- function(orm, sample, peak_picking_params) { - return (add_sample_process_parameters( - params=peak_picking_params, - params_translation=list( - ppm="ppm", - maxcharge="maxCharge", - maxiso="maxIso" +load_process_params_peak_picking <- function( + orm, + sample, + peak_picking_params +) { + return(add_sample_process_parameters( + params = peak_picking_params, + params_translation = list( + ppm = "ppm", + maxcharge = "maxCharge", + maxiso = "maxIso" ), - param_model_generator=orm$peak_picking_parameters, - sample_param_setter=sample$set_peak_picking_parameters + param_model_generator = orm$peak_picking_parameters, + sample_param_setter = sample$set_peak_picking_parameters )) } @@ -937,7 +1051,7 @@ params_model <- do.call(param_model_generator, model_params) params_model$save() } - return (sample_param_setter(params_model)$save()) + return(sample_param_setter(params_model)$save()) } @@ -946,56 +1060,67 @@ option_list <- list( optparse::make_option( c("-v", "--version"), - action="store_true", - help="Display this tool's version and exits" + action = "store_true", + help = "Display this tool's version and exits" ), optparse::make_option( c("-i", "--input"), - type="character", - help="The rdata path to import in XSeeker" + type = "character", + help = "The rdata path to import in XSeeker" ), optparse::make_option( c("-s", "--samples"), - type="character", - help="Samples to visualise in XSeeker" + type = "character", + help = "Samples to visualise in XSeeker" ), optparse::make_option( c("-B", "--archetype"), - type="character", - help="The name of the base database" + type = "character", + help = "The name of the base database" ), optparse::make_option( c("-b", "--database"), - type="character", - help="The base database's path" + type = "character", + help = "The base database's path" ), optparse::make_option( c("-c", "--compounds-csv"), - type="character", - help="The csv containing compounds" + type = "character", + help = "The csv containing compounds" ), optparse::make_option( c("-m", "--models"), - type="character", - help="The path or url (must begin with http[s]:// or git@) to the database's models" + type = "character", + help = paste( + "The path or url (must begin with http[s]:// or git@) to", + "the database's models" + ) ), optparse::make_option( + c("-k", "--class"), + type = "character", + help = "The name of the column containing the classes" + ), + optparse::make_option( c("-o", "--output"), - type="character", - help="The path where to output sqlite" + type = "character", + help = "The path where to output sqlite" ), optparse::make_option( c("-P", "--not-show-percent"), - action="store_true", - help="Flag not to show the percents", - default=FALSE + action = "store_true", + help = "Flag not to show the percents", + default = FALSE ) ) -options(error=function(){traceback(3)}) +options(error = function(){traceback(3)}) -parser <- OptionParser(usage="%prog [options] file", option_list=option_list) -args <- parse_args(parser, positional_arguments=0) +parser <- OptionParser( + usage = "%prog [options] file", + option_list = option_list +) +args <- parse_args(parser, positional_arguments = 0) err_code <- 0 @@ -1006,8 +1131,8 @@ models <- get_models(args$options$models) orm <- DBModelR::ORM( - connection_params=list(dbname=args$options$output), - dbms="SQLite" + connection_params = list(dbname=args$options$output), + dbms = "SQLite" ) invisible(orm$models(models)) @@ -1023,7 +1148,7 @@ message(sprintf("Base data inserted using %s.", args$options$database)) if (!is.null(args$options$archetype)) { - insert_base_data(orm, args$options$archetype, archetype=TRUE) + insert_base_data(orm, args$options$archetype, archetype = TRUE) } if (!is.null(args$options$`compounds-csv`)) { insert_compounds(orm, args$options$`compounds-csv`) @@ -1038,6 +1163,4 @@ process_rdata(orm, rdata, args$options) -quit(status=err_code) - - +quit(status = err_code)
--- a/XSeekerPreparator.xml Tue Feb 01 18:09:11 2022 +0000 +++ b/XSeekerPreparator.xml Tue Oct 18 12:57:28 2022 +0000 @@ -1,14 +1,9 @@ -<tool id="xseeker_preparator" - name="XSeeker Preparator" - version="1.2.4" -> - <description>Prepare RData file from CAMERA to be visualized in XSeeker</description> - +<tool id="xseeker_preparator" name="XSeeker Preparator" version="1.3.0"> + <description>prepares RData file from XCMS+CAMERA for XSeeker</description> <edam_operations> <edam_operation>operation_1812</edam_operation> <edam_operation>operation_0335</edam_operation> </edam_operations> - <requirements> <requirement type="package" >bioconductor-xcms</requirement> <requirement type="package" version="1.48.0">bioconductor-camera</requirement> @@ -36,58 +31,47 @@ --> </requirements> <stdio> - <exit_code range="1" level="warning" description="Selected samples have no data associated to them." /> - <exit_code range="2" level="warning" description="Some samples have no data associated to them." /> - </stdio> - <version_command> Rscript '$__tool_directory__/XSeekerPreparator.R' -v </version_command> - <command> <![CDATA[ Rscript '$__tool_directory__/XSeekerPreparator.R' - -P - --input '$input' --output '$output' - #if $samples.selected --samples '${",".join($samples.selected)}' #end if - #if $database.archetypes --archetype '${",".join($database.archetypes)}' #end if - #if $database.base.kind == "tabular" --compounds-csv '${database.base.tabular}' #else if $database.base.kind == "sql" --database '${database.base.sql}' #end if - #if $database.models.kind == "default" --models '${base_config}' #else --models '${database.models.url}' #end if - + #if $class_column + --class '${class_column}' + #end if ]]> - </command> - <inputs> <param name="input" @@ -98,6 +82,18 @@ format="rdata" > </param> + <param + name="class_column" + type="text" + value="" + label="Column class name" + help=" + The name of the column containing the classes - + leave empty to let xsprep guess + " + optional="true" + > + </param> <section name="samples" title="Samples Options" expanded="false"> <param name="selected" @@ -109,7 +105,6 @@ > </param> </section> - <section name="database" title="Database Options" expanded="false"> <param name="archetypes" @@ -120,7 +115,6 @@ <option value="G" selected="true">General</option> <option value="H">Halogenates</option> </param> - <conditional name="base"> <param name="kind" type="select" label="File containing compound's type"> <option value="none" selected="true">None (deafult)</option> @@ -150,7 +144,6 @@ </param> </when> </conditional> - <conditional name="models"> <param name="kind" type="select" label="How is the database's model defined"> <option value="default" selected="true">Default (regular XSeeker Database)</option> @@ -166,13 +159,10 @@ </conditional> </section> </inputs> - - <outputs> <data format="sqlite" name="output" /> <!-- <data format="xseeker.sqlite" name="output" /> --> </outputs> - <configfiles> <configfile name="base_config"> tryCatch({ @@ -180,111 +170,113 @@ }, error=function(e) { stop("Please, install DBModelR before you source this file.") }) - list( - adduct=DBModelR::ModelDefinition( - table="adduct", - fields=list( - name="TEXT", - mass="FLOAT", - charge="INTEGER", - multi="INTEGER", - formula_add="TEXT", - formula_ded="TEXT", - sign="TEXT", - oidscore="INTEGER", - quasi="INTEGER", - ips="FLOAT" + adduct = DBModelR::ModelDefinition( + table = "adduct", + fields = list( + name = "TEXT", + mass = "FLOAT", + charge = "INTEGER", + multi = "INTEGER", + formula_add = "TEXT", + formula_ded = "TEXT", + sign = "TEXT", + oidscore = "INTEGER", + quasi = "INTEGER", + ips = "FLOAT" ) ), - cluster=DBModelR::ModelDefinition( - table="cluster", - fields=list( - clusterID="INTEGER", - formula="TEXT", - annotation="TEXT", - coeff="FLOAT", - r_squared="FLOAT", - charge="INTEGER", - mean_rt="FLOAT", - score="FLOAT", - deviation="FLOAT", - status="TEXT", - curent_group="INTEGER", - pc_group="INTEGER", - align_group="INTEGER", - xcms_group="INTEGER" + cluster = DBModelR::ModelDefinition( + table = "cluster", + fields = list( + clusterID = "INTEGER", + formula = "TEXT", + annotation = "TEXT", + coeff = "FLOAT", + r_squared = "FLOAT", + charge = "INTEGER", + mean_rt = "FLOAT", + score = "FLOAT", + deviation = "FLOAT", + status = "TEXT", + # adduct = "TEXT", + curent_group = "INTEGER", + pc_group = "INTEGER", + align_group = "INTEGER", + xcms_group = "INTEGER" ), - one=list("sample", "compound", "adduct") + one = list("compound", "adduct"), + many = list("sample") ), - compound=DBModelR::ModelDefinition( - table="compound", - fields=list( - name="TEXT", - common_name="TEXT", - formula="TEXT", - charge="INTEGER", - date="TEXT", - mz="FLOAT" + compound = DBModelR::ModelDefinition( + table = "compound", + fields = list( + name = "TEXT", + common_name = "TEXT", + formula = "TEXT", + charge = "INTEGER", + date = "TEXT", + mz = "FLOAT" ) ), - feature=DBModelR::ModelDefinition( - table="feature", - fields=list( - featureID="INTEGER", - mz="FLOAT", - mz_min="FLOAT", - mz_max="FLOAT", - rt="FLOAT", - rt_min="FLOAT", - rt_max="FLOAT", - int_o="FLOAT", - int_b="FLOAT", - max_o="FLOAT", - iso="TEXT", - abundance="FLOAT" + feature = DBModelR::ModelDefinition( + table = "feature", + fields = list( + featureID = "INTEGER", + mz = "FLOAT", + mz_min = "FLOAT", + mz_max = "FLOAT", + rt = "FLOAT", + rt_min = "FLOAT", + rt_max = "FLOAT", + int_o = "FLOAT", + int_b = "FLOAT", + max_o = "FLOAT", + iso = "TEXT", + abundance = "FLOAT" ), - one=list("cluster") + one = list("cluster"), + many = list("sample") ), - instrument=DBModelR::ModelDefinition( - table="instrument", - fields=list( - model="TEXT", - manufacturer="TEXT", - analyzer="TEXT", - detector_type="TEXT", - ion_source="TEXT" + instrument = DBModelR::ModelDefinition( + table = "instrument", + fields = list( + model = "TEXT", + manufacturer = "TEXT", + analyzer = "TEXT", + detector_type = "TEXT", + ion_source = "TEXT" ) ), - instrument_config=DBModelR::ModelDefinition( - table="instrument_config", - fields=list( - resolution="TEXT", - agc_target="TEXT", - maximum_IT="TEXT", - number_of_scan_range="TEXT", - scan_range="TEXT", - version="TEXT" + instrument_config = DBModelR::ModelDefinition( + table = "instrument_config", + fields = list( + resolution = "TEXT", + agc_target = "TEXT", + maximum_IT = "TEXT", + number_of_scan_range = "TEXT", + scan_range = "TEXT", + version = "TEXT" ) ), - project=DBModelR::ModelDefinition( - table="project", - fields=list( - name="TEXT", - comment="TEXT" + project = DBModelR::ModelDefinition( + table = "project", + fields = list( + name = "TEXT", + comment = "TEXT" ), - one=list("sample") + one = list("sample") ), - sample=DBModelR::ModelDefinition( - table="sample", - fields=list( - name="TEXT", - path="TEXT", - polarity="TEXT", - kind="TEXT", ## rdata or mxml or enriched_rdata - raw="BLOB" + sample = DBModelR::ModelDefinition( + table = "sample", + fields = list( + name = "TEXT", + path = "TEXT", + polarity = "TEXT", + kind = "TEXT", ## rdata or mxml or enriched_rdata + raw = "BLOB" ), - one=list( + one = list( "peak_picking_parameters", "pairing_parameters", "alignmenmt_parameters", @@ -295,56 +287,56 @@ "smol_xcms_set" ) ), - smol_xcms_set=DBModelR::ModelDefinition( - table="smol_xcms_set", - fields=list( - raw="BLOB" + smol_xcms_set = DBModelR::ModelDefinition( + table = "smol_xcms_set", + fields = list( + raw = "BLOB" ) ), - software=DBModelR::ModelDefinition( - table="software", - fields=list( - name="TEXT", - version="TEXT" + software = DBModelR::ModelDefinition( + table = "software", + fields = list( + name = "TEXT", + version = "TEXT" ) ), - peak_picking_parameters=DBModelR::ModelDefinition( - table="peak_picking_parameters", - fields=list( - ppm="FLOAT", - peakwidth="TEXT", - snthresh="TEXT", - prefilterStep="TEXT", - prefilterLevel="TEXT", - mzdiff="TEXT", - fitgauss="TEXT", - noise="TEXT", - mzCenterFun="TEXT", - integrate="INTEGER", - firstBaselineCheck="TEXT", - snthreshIsoROIs="TEXT", - maxCharge="INTEGER", - maxIso="INTEGER", - mzIntervalExtension="TEXT" + peak_picking_parameters = DBModelR::ModelDefinition( + table = "peak_picking_parameters", + fields = list( + ppm = "FLOAT", + peakwidth = "TEXT", + snthresh = "TEXT", + prefilterStep = "TEXT", + prefilterLevel = "TEXT", + mzdiff = "TEXT", + fitgauss = "TEXT", + noise = "TEXT", + mzCenterFun = "TEXT", + integrate = "INTEGER", + firstBaselineCheck = "TEXT", + snthreshIsoROIs = "TEXT", + maxCharge = "INTEGER", + maxIso = "INTEGER", + mzIntervalExtension = "TEXT" ) ), - alignmenmt_parameters=DBModelR::ModelDefinition( - table="alignmenmt_parameters", - fields=list( - binSize="TEXT", - centerSample="TEXT", - response="TEXT", - distFun="TEXT", - gapInit="TEXT", - gapExtend="TEXT", - factorDiag="TEXT", - factorGap="TEXT", - localAlignment="INTEGER", - initPenalty="TEXT", - bw="TEXT", - minFraction="TEXT", - minSamples="TEXT", - maxFeatures="TEXT" + alignmenmt_parameters = DBModelR::ModelDefinition( + table = "alignmenmt_parameters", + fields = list( + binSize = "TEXT", + centerSample = "TEXT", + response = "TEXT", + distFun = "TEXT", + gapInit = "TEXT", + gapExtend = "TEXT", + factorDiag = "TEXT", + factorGap = "TEXT", + localAlignment = "INTEGER", + initPenalty = "TEXT", + bw = "TEXT", + minFraction = "TEXT", + minSamples = "TEXT", + maxFeatures = "TEXT" ) ) )
--- a/data/models.R Tue Feb 01 18:09:11 2022 +0000 +++ b/data/models.R Tue Oct 18 12:57:28 2022 +0000 @@ -1,117 +1,117 @@ tryCatch({ - DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER")) -}, error=function(e) { + DBModelR::ModelDefinition(table = "yui", fields = list(yui = "INTEGER")) +}, error = function(e) { stop("Please, install DBModelR before you source this file.") }) list( - adduct=DBModelR::ModelDefinition( - table="adduct", - fields=list( - name="TEXT", - mass="FLOAT", - charge="INTEGER", - multi="INTEGER", - formula_add="TEXT", - formula_ded="TEXT", - sign="TEXT", - oidscore="INTEGER", - quasi="INTEGER", - ips="FLOAT" + adduct = DBModelR::ModelDefinition( + table = "adduct", + fields = list( + name = "TEXT", + mass = "FLOAT", + charge = "INTEGER", + multi = "INTEGER", + formula_add = "TEXT", + formula_ded = "TEXT", + sign = "TEXT", + oidscore = "INTEGER", + quasi = "INTEGER", + ips = "FLOAT" ) ), - cluster=DBModelR::ModelDefinition( - table="cluster", - fields=list( - clusterID="INTEGER", - formula="TEXT", - annotation="TEXT", - coeff="FLOAT", - r_squared="FLOAT", - charge="INTEGER", - mean_rt="FLOAT", - score="FLOAT", - deviation="FLOAT", - status="TEXT", - # adduct="TEXT", - curent_group="INTEGER", - pc_group="INTEGER", - align_group="INTEGER", - xcms_group="INTEGER" + cluster = DBModelR::ModelDefinition( + table = "cluster", + fields = list( + clusterID = "INTEGER", + formula = "TEXT", + annotation = "TEXT", + coeff = "FLOAT", + r_squared = "FLOAT", + charge = "INTEGER", + mean_rt = "FLOAT", + score = "FLOAT", + deviation = "FLOAT", + status = "TEXT", + # adduct = "TEXT", + curent_group = "INTEGER", + pc_group = "INTEGER", + align_group = "INTEGER", + xcms_group = "INTEGER" ), - one=list("compound", "adduct"), - many=list("sample") + one = list("compound", "adduct"), + many = list("sample") ), - compound=DBModelR::ModelDefinition( - table="compound", - fields=list( - name="TEXT", - common_name="TEXT", - formula="TEXT", - charge="INTEGER", - date="TEXT", - mz="FLOAT" + compound = DBModelR::ModelDefinition( + table = "compound", + fields = list( + name = "TEXT", + common_name = "TEXT", + formula = "TEXT", + charge = "INTEGER", + date = "TEXT", + mz = "FLOAT" ) ), - feature=DBModelR::ModelDefinition( - table="feature", - fields=list( - featureID="INTEGER", - mz="FLOAT", - mz_min="FLOAT", - mz_max="FLOAT", - rt="FLOAT", - rt_min="FLOAT", - rt_max="FLOAT", - int_o="FLOAT", - int_b="FLOAT", - max_o="FLOAT", - iso="TEXT", - abundance="FLOAT" + feature = DBModelR::ModelDefinition( + table = "feature", + fields = list( + featureID = "INTEGER", + mz = "FLOAT", + mz_min = "FLOAT", + mz_max = "FLOAT", + rt = "FLOAT", + rt_min = "FLOAT", + rt_max = "FLOAT", + int_o = "FLOAT", + int_b = "FLOAT", + max_o = "FLOAT", + iso = "TEXT", + abundance = "FLOAT" ), - one=list("cluster"), - many=list("sample") + one = list("cluster"), + many = list("sample") ), - instrument=DBModelR::ModelDefinition( - table="instrument", - fields=list( - model="TEXT", - manufacturer="TEXT", - analyzer="TEXT", - detector_type="TEXT", - ion_source="TEXT" + instrument = DBModelR::ModelDefinition( + table = "instrument", + fields = list( + model = "TEXT", + manufacturer = "TEXT", + analyzer = "TEXT", + detector_type = "TEXT", + ion_source = "TEXT" ) ), - instrument_config=DBModelR::ModelDefinition( - table="instrument_config", - fields=list( - resolution="TEXT", - agc_target="TEXT", - maximum_IT="TEXT", - number_of_scan_range="TEXT", - scan_range="TEXT", - version="TEXT" + instrument_config = DBModelR::ModelDefinition( + table = "instrument_config", + fields = list( + resolution = "TEXT", + agc_target = "TEXT", + maximum_IT = "TEXT", + number_of_scan_range = "TEXT", + scan_range = "TEXT", + version = "TEXT" ) ), - project=DBModelR::ModelDefinition( - table="project", - fields=list( - name="TEXT", - comment="TEXT" + project = DBModelR::ModelDefinition( + table = "project", + fields = list( + name = "TEXT", + comment = "TEXT" ), - one=list("sample") + one = list("sample") ), - sample=DBModelR::ModelDefinition( - table="sample", - fields=list( - name="TEXT", - path="TEXT", - polarity="TEXT", - kind="TEXT", ## rdata or mxml or enriched_rdata - raw="BLOB" + sample = DBModelR::ModelDefinition( + table = "sample", + fields = list( + name = "TEXT", + path = "TEXT", + polarity = "TEXT", + kind = "TEXT", ## rdata or mxml or enriched_rdata + raw = "BLOB" ), - one=list( + one = list( "peak_picking_parameters", "pairing_parameters", "alignmenmt_parameters", @@ -122,64 +122,56 @@ "smol_xcms_set" ) ), - smol_xcms_set=DBModelR::ModelDefinition( - table="smol_xcms_set", - fields=list( - raw="BLOB" + smol_xcms_set = DBModelR::ModelDefinition( + table = "smol_xcms_set", + fields = list( + raw = "BLOB" ) ), - software=DBModelR::ModelDefinition( - table="software", - fields=list( - name="TEXT", - version="TEXT" + software = DBModelR::ModelDefinition( + table = "software", + fields = list( + name = "TEXT", + version = "TEXT" ) ), - # camera_parameters=DBModelR::ModelDefinition( - # table="camera_parameters", - # fields=list() - # ), - # pairing_parameters=DBModelR::ModelDefinition( - # table="pairing_parameters", - # fields=list() - # ), - peak_picking_parameters=DBModelR::ModelDefinition( - table="peak_picking_parameters", - fields=list( - ppm="FLOAT", - peakwidth="TEXT", - snthresh="TEXT", - prefilterStep="TEXT", - prefilterLevel="TEXT", - mzdiff="TEXT", - fitgauss="TEXT", - noise="TEXT", - mzCenterFun="TEXT", - integrate="INTEGER", - firstBaselineCheck="TEXT", - snthreshIsoROIs="TEXT", - maxCharge="INTEGER", - maxIso="INTEGER", - mzIntervalExtension="TEXT" + peak_picking_parameters = DBModelR::ModelDefinition( + table = "peak_picking_parameters", + fields = list( + ppm = "FLOAT", + peakwidth = "TEXT", + snthresh = "TEXT", + prefilterStep = "TEXT", + prefilterLevel = "TEXT", + mzdiff = "TEXT", + fitgauss = "TEXT", + noise = "TEXT", + mzCenterFun = "TEXT", + integrate = "INTEGER", + firstBaselineCheck = "TEXT", + snthreshIsoROIs = "TEXT", + maxCharge = "INTEGER", + maxIso = "INTEGER", + mzIntervalExtension = "TEXT" ) ), - alignmenmt_parameters=DBModelR::ModelDefinition( - table="alignmenmt_parameters", - fields=list( - binSize="TEXT", - centerSample="TEXT", - response="TEXT", - distFun="TEXT", - gapInit="TEXT", - gapExtend="TEXT", - factorDiag="TEXT", - factorGap="TEXT", - localAlignment="INTEGER", - initPenalty="TEXT", - bw="TEXT", - minFraction="TEXT", - minSamples="TEXT", - maxFeatures="TEXT" + alignmenmt_parameters = DBModelR::ModelDefinition( + table = "alignmenmt_parameters", + fields = list( + binSize = "TEXT", + centerSample = "TEXT", + response = "TEXT", + distFun = "TEXT", + gapInit = "TEXT", + gapExtend = "TEXT", + factorDiag = "TEXT", + factorGap = "TEXT", + localAlignment = "INTEGER", + initPenalty = "TEXT", + bw = "TEXT", + minFraction = "TEXT", + minSamples = "TEXT", + maxFeatures = "TEXT" ) ) )
--- a/format_versionning.MD Tue Feb 01 18:09:11 2022 +0000 +++ b/format_versionning.MD Tue Oct 18 12:57:28 2022 +0000 @@ -10,6 +10,11 @@ modifications provided by the newest versions are on top of the file. +VERSION 1.3.0 +===== +add an option to use the provided column as a class + + VERSION 1.1.2 ===== add missing mz_tab_info$group_length field to produce mzTab
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R Tue Feb 01 18:09:11 2022 +0000 +++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R Tue Oct 18 12:57:28 2022 +0000 @@ -1,17 +1,27 @@ -TOOL_NAME <- "XSeekerPreparator" -VERSION <- "1.2.4" +assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv()) +lockBinding("TOOL_NAME", globalenv()) +assign("VERSION", "1.3.0", envir = globalenv()) +lockBinding("VERSION", globalenv()) +assign("DEBUG_FAST", FALSE, envir = globalenv()) +lockBinding("DEBUG_FAST", globalenv()) +assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv()) +lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv()) +assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv()) +lockBinding("PROCESS_SMOL_BATCH", globalenv()) +assign("FAST_FEATURE_RATIO", 10, envir = globalenv()) +lockBinding("FAST_FEATURE_RATIO", globalenv()) +assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv()) +lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv()) -DEBUG_FAST <- FALSE -DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST -PROCESS_SMOL_BATCH <- DEBUG_FAST -FAST_FEATURE_RATIO <- 10 - -OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" - -ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-") -ENRICHED_RDATA_DOC <- sprintf(" +assign( + "ENRICHED_RDATA_VERSION", + paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"), + envir = globalenv() +) +lockBinding("ENRICHED_RDATA_VERSION", globalenv()) +assign("ENRICHED_RDATA_DOC", sprintf(" Welcome to the enriched <Version %s> of the output of CAMERA/xcms. This doc was generated by the tool: %s - Version %s To show the different variables contained in this rdata, type: @@ -41,7 +51,11 @@ - enriched_rdata_version: - Description: A flag created by that tool to tell which version of this tool has enriched the rdata. - - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION) + - Retrieval method: + enriched_rdata_version <- sprintf( + \"%s\", + ENRICHED_RDATA_VERSION + ) - enriched_rdata_doc: - Description: Contains the documentation string. @@ -81,7 +95,10 @@ - polarity: - Description: Those are the polarity values from the original mzxml file, extracted using xcms 2. - - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]]) + - Retrieval method: + as.character(xcms::xcmsRaw( + 'original_file.mzxml' + )@polarity[[1]]) - xcms version: 2.0 Data taken from incoming rdata @@ -103,12 +120,18 @@ process_params <- list() for (list_name in names(rdata_file$listOFlistArguments)) { param_list <- list() - for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) { - param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]] + for (param_name in names( + rdata_file$listOFlistArguments[[list_name]] + )) { + param_list[[param_name]] <- rdata_file$listOFlistArguments[[ + list_name + ]][[param_name]] } process_params[[length(process_params)+1]] <- param_list } -", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION) +", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION), +envir = globalenv()) +lockBinding("ENRICHED_RDATA_DOC", globalenv()) @@ -120,21 +143,21 @@ } ## galaxy mangles the "@" to a "__at__" if (substr(path, 1, 9) == "git__at__") { - path <- sub("^git__at__", "git@", path, perl=TRUE) + path <- sub("^git__at__", "git@", path, perl = TRUE) } if ( substr(path, 1, 4) == "git@" - || substr(path, length(path)-4, 4) == ".git" + || substr(path, length(path) - 4, 4) == ".git" ) { - return (get_models_from_git(path)) + return(get_models_from_git(path)) } if (substr(path, 1, 4) == "http") { - return (get_models_from_url(path)) + return(get_models_from_url(path)) } - return (source(path)$value) + return(source(path)$value) } -get_models_from_git <- function (url, target_file="models.R", rm=TRUE) { +get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) { tmp <- tempdir() message(sprintf("Cloning %s", url)) system2("git", c("clone", url, tmp)) @@ -142,12 +165,12 @@ if (!is.null(result)) { models <- source(result)$value if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } - return (models) + return(models) } if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } stop(sprintf( "Could not find any file named \"%s\" in this repo", @@ -155,19 +178,19 @@ )) } -get_models_from_url <- function (url, target_file="models.R", rm=TRUE) { +get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) { tmp <- tempdir() message(sprintf("Downloading %s", url)) result <- file.path(tmp, target_file) - if (download.file(url, destfile=result) == 0) { + if (download.file(url, destfile = result) == 0) { models <- source(result)$value if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } - return (models) + return(models) } if (rm) { - unlink(tmp, recursive=TRUE) + unlink(tmp, recursive = TRUE) } stop("Could not download any file at this adress.") } @@ -178,162 +201,162 @@ if (is.dir(file)) { result <- search_tree(file.path(path, file), target) if (!is.null(result)) { - return (result) + return(result) } } else if (tolower(file) == target) { - return (file.path(path, file)) + return(file.path(path, file)) } } - return (NULL) + return(NULL) } create_database <- function(orm) { - orm$recreate_database(no_exists=FALSE) + orm$recreate_database(no_exists = FALSE) set_database_version(orm, "created") } insert_adducts <- function(orm) { message("Creating adducts...") adducts <- list( - list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"), - list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"), - list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"), - list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"), - list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), - list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), - list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"), - list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), - list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), - list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"), - list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), - list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), - list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"), - list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"), - list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"), - list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"), - list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"), - list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"), - list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"), - list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"), - list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"), - list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"), - list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"), - list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), - list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"), - list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), - list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"), - list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"), - list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"), - list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), - list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), - list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"), - list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"), - list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), - list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), - list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"), - list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"), - list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"), - list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), - list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), - list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"), - list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"), - list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"), - list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"), - list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), - list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), - list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"), - list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), - list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), - list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"), - list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"), - list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"), - list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"), - list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"), - list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"), - list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), - list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), - list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"), - list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), - list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), - list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"), - list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"), - list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"), - list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"), - list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"), - list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"), - list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"), - list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"), - list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"), - list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"), - list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), - list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), - list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"), - list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), - list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), - list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"), - list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"), - list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), - list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), - list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"), - list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"), - list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"), - list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"), - list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"), - list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), - list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), - list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"), - list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"), - list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"), - list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"), - list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"), - list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"), - list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"), - list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"), - list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"), - list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"), - list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"), - list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"), - list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"), - list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"), - list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"), - list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"), - list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"), - list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"), - list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"), - list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0") + list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"), + list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"), + list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"), + list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"), + list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), + list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"), + list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"), + list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), + list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"), + list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"), + list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), + list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"), + list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"), + list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"), + list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"), + list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"), + list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"), + list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"), + list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"), + list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"), + list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"), + list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"), + list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"), + list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), + list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"), + list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"), + list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"), + list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"), + list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"), + list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), + list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"), + list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"), + list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"), + list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), + list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"), + list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"), + list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"), + list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"), + list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), + list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"), + list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"), + list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"), + list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"), + list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"), + list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), + list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"), + list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"), + list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), + list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"), + list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"), + list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), + list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"), + list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"), + list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"), + list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"), + list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), + list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"), + list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"), + list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), + list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"), + list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"), + list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), + list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"), + list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"), + list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"), + list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), + list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"), + list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"), + list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"), + list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"), + list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), + list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"), + list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"), + list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), + list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"), + list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"), + list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"), + list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), + list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"), + list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"), + list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"), + list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"), + list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"), + list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"), + list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), + list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"), + list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"), + list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), + list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"), + list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), + list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"), + list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"), + list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"), + list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"), + list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"), + list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"), + list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"), + list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"), + list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"), + list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"), + list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"), + list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"), + list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"), + list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"), + list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"), + list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0") ) dummy_adduct <- orm$adduct() for (adduct in adducts) { i <- 0 - dummy_adduct$set_name(adduct[[i <- i+1]]) - dummy_adduct$set_multi(adduct[[i <- i+1]]) - dummy_adduct$set_charge(adduct[[i <- i+1]]) - dummy_adduct$set_mass(adduct[[i <- i+1]]) - dummy_adduct$set_oidscore(adduct[[i <- i+1]]) - dummy_adduct$set_quasi(adduct[[i <- i+1]]) - dummy_adduct$set_ips(adduct[[i <- i+1]]) - dummy_adduct$set_formula_add(adduct[[i <- i+1]]) - dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) + dummy_adduct$set_name(adduct[[i <- i + 1]]) + dummy_adduct$set_multi(adduct[[i <- i + 1]]) + dummy_adduct$set_charge(adduct[[i <- i + 1]]) + dummy_adduct$set_mass(adduct[[i <- i + 1]]) + dummy_adduct$set_oidscore(adduct[[i <- i + 1]]) + dummy_adduct$set_quasi(adduct[[i <- i + 1]]) + dummy_adduct$set_ips(adduct[[i <- i + 1]]) + dummy_adduct$set_formula_add(adduct[[i <- i + 1]]) + dummy_adduct$set_formula_ded(adduct[[i <- i + 1]]) invisible(dummy_adduct$save()) - dummy_adduct$clear(unset_id=TRUE) + dummy_adduct$clear(unset_id = TRUE) } message("Adducts created") } -insert_base_data <- function(orm, path, archetype=FALSE) { +insert_base_data <- function(orm, path, archetype = FALSE) { if (archetype) { ## not implemented yet - return () + return() } base_data <- readLines(path) - for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) { + for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) { orm$execute(sql) } set_database_version(orm, "enriched") } insert_compounds <- function(orm, compounds_path) { - compounds <- read.csv(file=compounds_path, sep="\t") + compounds <- read.csv(file = compounds_path, sep = "\t") if (is.null(compounds <- translate_compounds(compounds))) { stop("Could not find asked compound's attributes in csv file.") } @@ -344,18 +367,21 @@ dummy_compound$set_name(compounds[i, "name"]) dummy_compound$set_common_name(compounds[i, "common_name"]) dummy_compound$set_formula(compounds[i, "formula"]) - compound_list[[length(compound_list)+1]] <- as.list( + compound_list[[length(compound_list) + 1]] <- as.list( dummy_compound, c("mz", "name", "common_name", "formula") ) - dummy_compound$clear(unset_id=TRUE) + dummy_compound$clear(unset_id = TRUE) } - invisible(dummy_compound$save(bulk=compound_list)) + invisible(dummy_compound$save(bulk = compound_list)) } translate_compounds <- function(compounds) { recognized_headers <- list( - c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") + c( + "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", + "MetName", "ChemFormula", "INChIkey" + ) ) header_translators <- list( hmdb_header_translator @@ -363,23 +389,23 @@ for (index in seq_along(recognized_headers)) { headers <- recognized_headers[[index]] if (identical(colnames(compounds), headers)) { - return (header_translators[[index]](compounds)) + return(header_translators[[index]](compounds)) } } if (is.null(translator <- guess_translator(colnames(compounds)))) { - return (NULL) + return(NULL) } - return (csv_header_translator(translator, compounds)) + return(csv_header_translator(translator, compounds)) } guess_translator <- function(header) { result <- list( - # HMDB_ID=NULL, - mz=NULL, - name=NULL, - common_name=NULL, - formula=NULL, - # inchi_key=NULL + # HMDB_ID = NULL, + mz = NULL, + name = NULL, + common_name = NULL, + formula = NULL, + # inchi_key = NULL ) asked_cols <- names(result) for (asked_col in asked_cols) { @@ -395,39 +421,39 @@ } } if (any(mapply(is.null, result))) { - return (NULL) + return(NULL) } - return (result) + return(result) } hmdb_header_translator <- function(compounds) { - return (csv_header_translator( + return(csv_header_translator( list( - HMDB_ID="HMDB_ID", - mz="MzBank", - name="MetName", - common_name="MetName", - formula="ChemFormula", - inchi_key="INChIkey" + HMDB_ID = "HMDB_ID", + mz = "MzBank", + name = "MetName", + common_name = "MetName", + formula = "ChemFormula", + inchi_key = "INChIkey" ), compounds )) } csv_header_translator <- function(translation_table, csv) { header_names <- names(translation_table) - result <- data.frame(1:nrow(csv)) + result <- data.frame(seq_len(nrow(csv))) for (i in seq_along(header_names)) { result[, header_names[[i]]] <- csv[, translation_table[[i]]] } result[, "mz"] <- as.numeric(result[, "mz"]) - return (result) + return(result) } set_database_version <- function(orm, version) { orm$set_tag( version, - tag_name="database_version", - tag_table_name="XSeeker_tagging_table" + tag_name = "database_version", + tag_table_name = "XSeeker_tagging_table" ) } @@ -444,15 +470,16 @@ error <- tryCatch({ process_sample_list( orm, rdata, samples, - show_percent=show_percent + show_percent = show_percent, + file_grouping_var = options$class ) NULL - }, error=function(e) { + }, error = function(e) { message(e) e }) if (!is.null(mzml_tmp_dir)) { - unlink(mzml_tmp_dir, recursive=TRUE) + unlink(mzml_tmp_dir, recursive = TRUE) } if (!is.null(error)) { stop(error) @@ -463,23 +490,49 @@ if (is.null(rdata$singlefile)) { message("Extracting mxml files") tmp <- tempdir() - rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp) - names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile)) + rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp) + names(rdata$singlefile) <- tools::file_path_sans_ext( + basename(rdata$singlefile) + ) message("Extracted") - return (tmp) + return(tmp) } else { - message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; "))) + message(sprintf( + "Not a zip file, loading files directly from path: %s", + paste(rdata$singlefile, collapse = " ; ") + )) } - return (NULL) + return(NULL) } -process_sample_list <- function(orm, radta, sample_names, show_percent) { - file_grouping_var <- find_grouping_var(rdata$variableMetadata) +process_sample_list <- function( + orm, + rdata, + sample_names, + show_percent, + file_grouping_var = NULL +) { + if (is.null(file_grouping_var)) { + file_grouping_var <- find_grouping_var(rdata$variableMetadata) + if (is.null(file_grouping_var)) { + stop("Malformed variableMetada.") + } + } + tryCatch({ + headers <- colnames(rdata$variableMetadata) + file_grouping_var <- headers[[as.numeric(file_grouping_var)]] + }, error = function(e) NULL) + if ( + is.null(file_grouping_var) + || !(file_grouping_var %in% colnames(rdata$variableMetadata)) + ) { + stop(sprintf( + "Could not find grouping variable %s in var meta file.", + file_grouping_var + )) + } message("Processing samples.") message(sprintf("File grouping variable: %s", file_grouping_var)) - if(is.null(file_grouping_var)) { - stop("Malformed variableMetada.") - } context <- new.env() context$samples <- list() @@ -492,7 +545,6 @@ process_params <- list() if (is.null(process_arg_list)) { - histories <- list() for (history in xcms_set@.processHistory) { if ( class(history@param) == "CentWaveParam" @@ -500,19 +552,23 @@ ) { params <- history@param process_params <- list(list( - xfunction="annotatediff", - ppm=params@ppm, - peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]), - snthresh=params@snthresh, - prefilterStep=params@prefilter[[1]], - prefilterLevel=params@prefilter[[2]], - mzdiff=params@mzdiff, - fitgauss=params@fitgauss, - noise=params@noise, - mzCenterFun=params@mzCenterFun, - integrate=params@integrate, - firstBaselineCheck=params@firstBaselineCheck, - snthreshIsoROIs=!identical(params@roiScales, numeric(0)) + xfunction = "annotatediff", + ppm = params@ppm, + peakwidth = sprintf( + "%s - %s", + params@peakwidth[[1]], + params@peakwidth[[2]] + ), + snthresh = params@snthresh, + prefilterStep = params@prefilter[[1]], + prefilterLevel = params@prefilter[[2]], + mzdiff = params@mzdiff, + fitgauss = params@fitgauss, + noise = params@noise, + mzCenterFun = params@mzCenterFun, + integrate = params@integrate, + firstBaselineCheck = params@firstBaselineCheck, + snthreshIsoROIs = !identical(params@roiScales, numeric(0)) )) break } @@ -521,9 +577,11 @@ for (list_name in names(process_arg_list)) { param_list <- list() for (param_name in names(process_arg_list[[list_name]])) { - param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] + param_list[[param_name]] <- process_arg_list[[ + list_name + ]][[param_name]] } - process_params[[length(process_params)+1]] <- param_list + process_params[[length(process_params) + 1]] <- param_list } } @@ -531,6 +589,17 @@ indices <- as.numeric(unique(var_meta[, file_grouping_var])) + if (any(is.null(names(singlefile)[indices]))) { + stop(sprintf( + paste( + "Indices defined by grouping variable %s are not all present", + "in singlefile names (%s).\nCannot continue. Indices: %s" + ), + file_grouping_var, + paste(names(singlefile), collapse = ", "), + paste(indices, collapse = ", ") + )) + } smol_xcms_set <- orm$smol_xcms_set() mz_tab_info <- new.env() g <- xcms::groups(xcms_set) @@ -538,10 +607,16 @@ mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) mz_tab_info$sampnames <- xcms::sampnames(xcms_set) mz_tab_info$sampclass <- xcms::sampclass(xcms_set) - mz_tab_info$rtmed <- g[,"rtmed"] - mz_tab_info$mzmed <- g[,"mzmed"] - mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") - blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) + mz_tab_info$rtmed <- g[, "rtmed"] + mz_tab_info$mzmed <- g[, "mzmed"] + mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval( + xcms_set, + value = "into" + ) + blogified <- blob::blob(fst::compress_fst( + serialize(mz_tab_info, NULL), + compression = 100 + )) rm(mz_tab_info) invisible(smol_xcms_set$set_raw(blogified)$save()) @@ -582,9 +657,9 @@ env$enriched_rdata_doc <- ENRICHED_RDATA_DOC sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) - rm (env) + rm(env) context$samples[no] <- sample$get_id() - rm (sample) + rm(sample) } context$clusters <- list() context$show_percent <- show_percent @@ -597,28 +672,32 @@ message("Features enrichment") complete_features(orm, clusters, show_percent) message("Features enrichment done.") - return (NULL) + return(NULL) } find_grouping_var <- function(var_meta) { - known_colnames = c( + known_colnames <- c( "name", "namecustom", "mz", "mzmin", "mzmax", - "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup" + "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", + "pcgroup", "ms_level" ) col_names <- colnames(var_meta) - classes = list() + classes <- list() for (name in col_names) { if (!(name %in% known_colnames)) { - classes[[length(classes)+1]] = name + classes[[length(classes) + 1]] <- name } } if (length(classes) > 1) { - stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes))) + stop(sprintf( + "Only one class expected in the variable metadata. Found %d .", + length(classes) + )) } if (length(classes) == 0) { stop("Could not find any class column in your variableMetadata.") } - return (classes[[1]]) + return(classes[[1]]) } add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { @@ -629,12 +708,15 @@ $set_path(env$dataset_path) $set_kind("enriched_rdata") $set_polarity( - if (is.null(env$polarity) || identical(env$polarity, character(0))) "" + if ( + is.null(env$polarity) + || identical(env$polarity, character(0)) + ) "" else env$polarity ) $set_raw(blob::blob(fst::compress_fst( serialize(env, NULL), - compression=100 + compression = 100 ))) ) sample[["smol_xcms_set_id"]] <- smol_xcms_set_id @@ -642,7 +724,7 @@ sample <- sample$save() load_process_params(orm, sample, env$process_params) message(sprintf("Sample %s inserted.", env$sample_name)) - return (sample) + return(sample) } @@ -660,14 +742,14 @@ next_pc_group, next_align_group )) message("Extracting features done.") - return (NULL) + return(NULL) } get_next_id <- function(models, attribute) { if ((id <- models$max(attribute)) == Inf || id == -Inf) { - return (0) + return(0) } - return (id) + return(id) } create_features <- function( @@ -676,7 +758,7 @@ next_pc_group, next_align_group ) { field_names <- as.list(names(orm$feature()$fields__)) - field_names[field_names=="id"] <- NULL + field_names[field_names == "id"] <- NULL features <- list() dummy_feature <- orm$feature() @@ -688,13 +770,13 @@ rows <- seq_len(nrow(var_meta)) if (PROCESS_SMOL_BATCH) { - rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] + rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] } cluster_row <- list() for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 - message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) + message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) } dummy_feature$set_featureID(next_feature_id) @@ -710,27 +792,44 @@ peak_list <- context$peaks[context$groupidx[[row]], ] if (! ("matrix" %in% class(peak_list))) { - peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) + peak_list <- matrix( + peak_list, + nrow = 1, + ncol = length(peak_list), + dimnames = list(c(), names(peak_list)) + ) } clusterID <- as.character(clusterID) if (is.null(context$central_feature[[clusterID]])) { int_o <- extract_peak_var(peak_list, "into") context$central_feature[[clusterID]] <- ( - peak_list[peak_list[, "into"] == int_o,]["sample"] + peak_list[peak_list[, "into"] == int_o, ]["sample"] ) } if (!DEBUG_FAST_IGNORE_SLOW_OP) { - sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] - if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { - if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { + central_feature <- context$central_feature[[clusterID]] + sample_peak_list <- peak_list[ + as.integer(peak_list[, "sample"]) == central_feature, + , + drop = FALSE + ] + if ( + !identical(sample_peak_list, numeric(0)) + && !is.null(nrow(sample_peak_list)) + && nrow(sample_peak_list) != 0 + ) { + int_o <- extract_peak_var(sample_peak_list, "into") + if (!is.na(int_o)) { dummy_feature$set_int_o(int_o) } - if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { + int_b <- extract_peak_var(sample_peak_list, "intb") + if (!is.na(int_b)) { dummy_feature$set_int_b(int_b) } - if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { + max_o <- extract_peak_var(sample_peak_list, "maxo") + if (!is.na(max_o)) { dummy_feature$set_max_o(max_o) } } @@ -744,13 +843,13 @@ next_align_group ) next_align_group <- next_align_group + 1 - features[[length(features)+1]] <- as.list(dummy_feature, field_names) + features[[length(features) + 1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } rm(var_meta) message("") message("Saving features") - invisible(dummy_feature$save(bulk=features)) + invisible(dummy_feature$save(bulk = features)) ## We link manually clusters to the sample they're in. link_cache <- list() @@ -758,24 +857,25 @@ sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) for (sample_id in context$samples[sample_nos]) { cluster_id <- cluster_row[[row]]$get_id() - if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { + id <- paste(sample_id, cluster_id, sep = ";") + if (is.null(link_cache[[id]])) { link_cache[[id]] <- 1 orm$cluster_sample( - sample_id=sample_id, - cluster_id=cluster_id + sample_id = sample_id, + cluster_id = cluster_id )$save() } } } message("Saved.") - return (context$clusters) + return(context$clusters) } -extract_peak_var <- function(peak_list, var_name, selector=max) { +extract_peak_var <- function(peak_list, var_name, selector = max) { value <- peak_list[, var_name] names(value) <- NULL - return (selector(value)) + return(selector(value)) } set_feature_fields_from_var_meta <- function(feature, var_meta) { @@ -800,24 +900,24 @@ if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { feature$set_iso(isotopes) } - return (feature) + return(feature) } extract_iso <- function(weird_data) { if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { - return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE)) + return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE)) } - return (weird_data) + return(weird_data) } -extract_clusterID <- function(weird_data, next_cluster_id){ +extract_clusterID <- function(weird_data, next_cluster_id) { if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) } else { clusterID <- 0 } - return (clusterID + next_cluster_id) + return(clusterID + next_cluster_id) } create_associated_cluster <- function( @@ -831,24 +931,29 @@ adduct_name <- as.character(curent_var_meta[["adduct"]]) annotation <- curent_var_meta[["isotopes"]] cluster <- context$clusters[[clusterID]] <- orm$cluster( - pc_group=pcgroup + next_pc_group, + pc_group = pcgroup + next_pc_group, # adduct=adduct, - align_group=next_align_group, + align_group = next_align_group, # curent_group=curent_group, - clusterID=context$clusterID, - annotation=annotation + clusterID = context$clusterID, + annotation = annotation ) if (is.null(adduct <- context$adducts[[adduct_name]])) { - context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first() + context$adducts[[adduct_name]] <- orm$adduct()$load_by( + name = adduct_name + )$first() if (is.null(adduct <- context$adducts[[adduct_name]])) { - adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0) + adduct <- context$adducts[[adduct_name]] <- orm$adduct( + name = adduct_name, + charge = 0 + ) adduct$save() } } cluster$set_adduct(adduct) - ## Crappy hack to assign sample id to cluster without loading the sample. - ## Samples are too big (their sample$env) and slows the process, and eat all the menory - ## so we dont't want to load them. + ## Crappy hack to assign sample id to cluster without loading the + ## sample. Samples are too big (their sample$env) and slows the + ## process, and eat all the menory so we dont't want to load them. cluster[["sample_id"]] <- main_sample_id cluster$modified__[["sample_id"]] <- main_sample_id } else { @@ -858,7 +963,7 @@ } cluster$save() feature$set_cluster(cluster) - return (cluster) + return(cluster) } complete_features <- function(orm, clusters, show_percent) { @@ -866,20 +971,25 @@ percent <- -1 i <- 0 for (cluster in clusters) { - i <- i+1 + i <- i + 1 if (show_percent && (i / total) * 100 > percent) { percent <- percent + 1 - message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) + message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) } - features <- orm$feature()$load_by(cluster_id=cluster$get_id()) + features <- orm$feature()$load_by(cluster_id = cluster$get_id()) if (features$any()) { if (!is.null(rt <- features$mean("rt"))) { cluster$set_mean_rt(rt)$save() } features_df <- as.data.frame(features) - central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ] + central_feature <- features_df[ + grepl("^\\[M\\]", features_df[, "iso"]), + ] central_feature_into <- central_feature[["int_o"]] - if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) { + if ( + !identical(central_feature_into, numeric(0)) + && central_feature_into != 0 + ) { for (feature in as.vector(features)) { feature$set_abundance( feature$get_int_o() / central_feature_into * 100 @@ -888,7 +998,7 @@ } } } - return (NULL) + return(NULL) } load_process_params <- function(orm, sample, params) { @@ -900,19 +1010,23 @@ load_process_params_peak_picking(orm, sample, param_list) } } - return (sample) + return(sample) } -load_process_params_peak_picking <- function(orm, sample, peak_picking_params) { - return (add_sample_process_parameters( - params=peak_picking_params, - params_translation=list( - ppm="ppm", - maxcharge="maxCharge", - maxiso="maxIso" +load_process_params_peak_picking <- function( + orm, + sample, + peak_picking_params +) { + return(add_sample_process_parameters( + params = peak_picking_params, + params_translation = list( + ppm = "ppm", + maxcharge = "maxCharge", + maxiso = "maxIso" ), - param_model_generator=orm$peak_picking_parameters, - sample_param_setter=sample$set_peak_picking_parameters + param_model_generator = orm$peak_picking_parameters, + sample_param_setter = sample$set_peak_picking_parameters )) } @@ -937,7 +1051,7 @@ params_model <- do.call(param_model_generator, model_params) params_model$save() } - return (sample_param_setter(params_model)$save()) + return(sample_param_setter(params_model)$save()) } @@ -946,56 +1060,67 @@ option_list <- list( optparse::make_option( c("-v", "--version"), - action="store_true", - help="Display this tool's version and exits" + action = "store_true", + help = "Display this tool's version and exits" ), optparse::make_option( c("-i", "--input"), - type="character", - help="The rdata path to import in XSeeker" + type = "character", + help = "The rdata path to import in XSeeker" ), optparse::make_option( c("-s", "--samples"), - type="character", - help="Samples to visualise in XSeeker" + type = "character", + help = "Samples to visualise in XSeeker" ), optparse::make_option( c("-B", "--archetype"), - type="character", - help="The name of the base database" + type = "character", + help = "The name of the base database" ), optparse::make_option( c("-b", "--database"), - type="character", - help="The base database's path" + type = "character", + help = "The base database's path" ), optparse::make_option( c("-c", "--compounds-csv"), - type="character", - help="The csv containing compounds" + type = "character", + help = "The csv containing compounds" ), optparse::make_option( c("-m", "--models"), - type="character", - help="The path or url (must begin with http[s]:// or git@) to the database's models" + type = "character", + help = paste( + "The path or url (must begin with http[s]:// or git@) to", + "the database's models" + ) ), optparse::make_option( + c("-k", "--class"), + type = "character", + help = "The name of the column containing the classes" + ), + optparse::make_option( c("-o", "--output"), - type="character", - help="The path where to output sqlite" + type = "character", + help = "The path where to output sqlite" ), optparse::make_option( c("-P", "--not-show-percent"), - action="store_true", - help="Flag not to show the percents", - default=FALSE + action = "store_true", + help = "Flag not to show the percents", + default = FALSE ) ) -options(error=function(){traceback(3)}) +options(error = function(){traceback(3)}) -parser <- OptionParser(usage="%prog [options] file", option_list=option_list) -args <- parse_args(parser, positional_arguments=0) +parser <- OptionParser( + usage = "%prog [options] file", + option_list = option_list +) +args <- parse_args(parser, positional_arguments = 0) err_code <- 0 @@ -1006,8 +1131,8 @@ models <- get_models(args$options$models) orm <- DBModelR::ORM( - connection_params=list(dbname=args$options$output), - dbms="SQLite" + connection_params = list(dbname=args$options$output), + dbms = "SQLite" ) invisible(orm$models(models)) @@ -1023,7 +1148,7 @@ message(sprintf("Base data inserted using %s.", args$options$database)) if (!is.null(args$options$archetype)) { - insert_base_data(orm, args$options$archetype, archetype=TRUE) + insert_base_data(orm, args$options$archetype, archetype = TRUE) } if (!is.null(args$options$`compounds-csv`)) { insert_compounds(orm, args$options$`compounds-csv`) @@ -1038,6 +1163,4 @@ process_rdata(orm, rdata, args$options) -quit(status=err_code) - - +quit(status = err_code)
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.xml Tue Feb 01 18:09:11 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,353 +0,0 @@ -<tool id="xseeker_preparator" - name="XSeeker Preparator" - version="1.2.4" -> - <description>Prepare RData file from CAMERA to be visualized in XSeeker</description> - - <edam_operations> - <edam_operation>operation_1812</edam_operation> - <edam_operation>operation_0335</edam_operation> - </edam_operations> - - <requirements> - <requirement type="package" >bioconductor-xcms</requirement> - <requirement type="package" version="1.48.0">bioconductor-camera</requirement> - <requirement type="package" >git</requirement> - <requirement type="package" >r-blob</requirement> - <requirement type="package" >r-dbi</requirement> - <requirement type="package" >r-fst</requirement> - <requirement type="package" >r-optparse</requirement> - <requirement type="package" >r-stringr</requirement> - <requirement type="package" >r-purrr</requirement> - <requirement type="package" >r-rsqlite</requirement> - <requirement type="package" >r-dbmodelr</requirement> - <!-- - <requirement type="package" version="3.12.0">bioconductor-xcms</requirement> - <requirement type="package" version="1.48.0">bioconductor-camera</requirement> - <requirement type="package" version="2.29.2">git</requirement> - <requirement type="package" version="1.2.1">r-blob</requirement> - <requirement type="package" version="1.1.0">r-dbi</requirement> - <requirement type="package" version="0.9.4">r-fst</requirement> - <requirement type="package" version="1.6.6">r-optparse</requirement> - <requirement type="package" version="1.4.0">r-stringr</requirement> - <requirement type="package" version="0.3.4">r-purrr</requirement> - <requirement type="package" version="2.2.1">r-rsqlite</requirement> - <requirement type="package" version="0.2.0">r-dbmodelr</requirement> - --> - </requirements> - <stdio> - - <exit_code - range="1" - level="warning" - description="Selected samples have no data associated to them." - /> - - <exit_code - range="2" - level="warning" - description="Some samples have no data associated to them." - /> - - </stdio> - - <version_command> - Rscript '$__tool_directory__/XSeekerPreparator.R' -v - </version_command> - - <command> - <![CDATA[ - Rscript '$__tool_directory__/XSeekerPreparator.R' - - -P - - --input '$input' - --output '$output' - - #if $samples.selected - --samples '${",".join($samples.selected)}' - #end if - - #if $database.archetypes - --archetype '${",".join($database.archetypes)}' - #end if - - #if $database.base.kind == "tabular" - --compounds-csv '${database.base.tabular}' - #else if $database.base.kind == "sql" - --database '${database.base.sql}' - #end if - - #if $database.models.kind == "default" - --models '${base_config}' - #else - --models '${database.models.url}' - #end if - - ]]> - - </command> - - <inputs> - <param - name="input" - type="data" - multiple="false" - label="Rdata to prepare" - optional="false" - format="rdata" - > - </param> - <section name="samples" title="Samples Options" expanded="false"> - <param - name="selected" - type="data" - multiple="true" - label="Samples to visualize" - optional="true" - format="mzml" - > - </param> - </section> - - <section name="database" title="Database Options" expanded="false"> - <param - name="archetypes" - type="select" - multiple="true" - label="Molecule family (for database's compounds enrichment)" - > - <option value="G" selected="true">General</option> - <option value="H">Halogenates</option> - </param> - - <conditional name="base"> - <param name="kind" type="select" label="File containing compound's type"> - <option value="none" selected="true">None (deafult)</option> - <option value="tabular">tabular</option> - <option value="sql">sql</option> - </param> - <when value="tabular"> - <param - name="tabular" - type="data" - multiple="true" - label="Tabular file containing compound to use in XSeeker" - optional="true" - format="tabular" - > - </param> - </when> - <when value="sql"> - <param - name="sql" - type="data" - multiple="true" - label="SQL file containing compound to use in XSeeker" - optional="true" - format="sql" - > - </param> - </when> - </conditional> - - <conditional name="models"> - <param name="kind" type="select" label="How is the database's model defined"> - <option value="default" selected="true">Default (regular XSeeker Database)</option> - <option value="url">Download model file</option> - <option value="git">Get versionned model file</option> - </param> - <when value="url"> - <param name="url" type="text" format="url" label="File URL"/> - </when> - <when value="git"> - <param name="url" type="text" format="url" label="Repo URL"/> - </when> - </conditional> - </section> - </inputs> - - - <outputs> - <data format="sqlite" name="output" /> - <!-- <data format="xseeker.sqlite" name="output" /> --> - </outputs> - - <configfiles> - <configfile name="base_config"> -tryCatch({ - DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER")) -}, error=function(e) { - stop("Please, install DBModelR before you source this file.") -}) - -list( - adduct=DBModelR::ModelDefinition( - table="adduct", - fields=list( - name="TEXT", - mass="FLOAT", - charge="INTEGER", - multi="INTEGER", - formula_add="TEXT", - formula_ded="TEXT", - sign="TEXT", - oidscore="INTEGER", - quasi="INTEGER", - ips="FLOAT" - ) - ), - cluster=DBModelR::ModelDefinition( - table="cluster", - fields=list( - clusterID="INTEGER", - formula="TEXT", - annotation="TEXT", - coeff="FLOAT", - r_squared="FLOAT", - charge="INTEGER", - mean_rt="FLOAT", - score="FLOAT", - deviation="FLOAT", - status="TEXT", - curent_group="INTEGER", - pc_group="INTEGER", - align_group="INTEGER", - xcms_group="INTEGER" - ), - one=list("sample", "compound", "adduct") - ), - compound=DBModelR::ModelDefinition( - table="compound", - fields=list( - name="TEXT", - common_name="TEXT", - formula="TEXT", - charge="INTEGER", - date="TEXT", - mz="FLOAT" - ) - ), - feature=DBModelR::ModelDefinition( - table="feature", - fields=list( - featureID="INTEGER", - mz="FLOAT", - mz_min="FLOAT", - mz_max="FLOAT", - rt="FLOAT", - rt_min="FLOAT", - rt_max="FLOAT", - int_o="FLOAT", - int_b="FLOAT", - max_o="FLOAT", - iso="TEXT", - abundance="FLOAT" - ), - one=list("cluster") - ), - instrument=DBModelR::ModelDefinition( - table="instrument", - fields=list( - model="TEXT", - manufacturer="TEXT", - analyzer="TEXT", - detector_type="TEXT", - ion_source="TEXT" - ) - ), - instrument_config=DBModelR::ModelDefinition( - table="instrument_config", - fields=list( - resolution="TEXT", - agc_target="TEXT", - maximum_IT="TEXT", - number_of_scan_range="TEXT", - scan_range="TEXT", - version="TEXT" - ) - ), - project=DBModelR::ModelDefinition( - table="project", - fields=list( - name="TEXT", - comment="TEXT" - ), - one=list("sample") - ), - sample=DBModelR::ModelDefinition( - table="sample", - fields=list( - name="TEXT", - path="TEXT", - polarity="TEXT", - kind="TEXT", ## rdata or mxml or enriched_rdata - raw="BLOB" - ), - one=list( - "peak_picking_parameters", - "pairing_parameters", - "alignmenmt_parameters", - "camera_parameters", - "instrument", - "instrument_config", - "software", - "smol_xcms_set" - ) - ), - smol_xcms_set=DBModelR::ModelDefinition( - table="smol_xcms_set", - fields=list( - raw="BLOB" - ) - ), - software=DBModelR::ModelDefinition( - table="software", - fields=list( - name="TEXT", - version="TEXT" - ) - ), - peak_picking_parameters=DBModelR::ModelDefinition( - table="peak_picking_parameters", - fields=list( - ppm="FLOAT", - peakwidth="TEXT", - snthresh="TEXT", - prefilterStep="TEXT", - prefilterLevel="TEXT", - mzdiff="TEXT", - fitgauss="TEXT", - noise="TEXT", - mzCenterFun="TEXT", - integrate="INTEGER", - firstBaselineCheck="TEXT", - snthreshIsoROIs="TEXT", - maxCharge="INTEGER", - maxIso="INTEGER", - mzIntervalExtension="TEXT" - ) - ), - alignmenmt_parameters=DBModelR::ModelDefinition( - table="alignmenmt_parameters", - fields=list( - binSize="TEXT", - centerSample="TEXT", - response="TEXT", - distFun="TEXT", - gapInit="TEXT", - gapExtend="TEXT", - factorDiag="TEXT", - factorGap="TEXT", - localAlignment="INTEGER", - initPenalty="TEXT", - bw="TEXT", - minFraction="TEXT", - minSamples="TEXT", - maxFeatures="TEXT" - ) - ) -) - </configfile> - </configfiles> -</tool>
--- a/test/test.sh Tue Feb 01 18:09:11 2022 +0000 +++ b/test/test.sh Tue Oct 18 12:57:28 2022 +0000 @@ -5,11 +5,14 @@ ln -s ../data/ ./data -~/R/bin/Rscript $(realpath ../XSeekerPreparator.R) \ +# alias Rscript=~/R-versions/R-4.1.2/bin/Rscript + +Rscript $(realpath ../XSeekerPreparator.R) \ -i $(realpath ../data/test.rdata) \ -m $(realpath ../data/models.R) \ -c $(realpath ../data/SERUM_v2019Jan17.tabular) \ -o $(realpath ../test.sqlite) \ + $@ \ || true
