# HG changeset patch # User lain # Date 1622540835 0 # Node ID 00079fadc2409b5edd5cb85b272759fd45b8af88 # Parent 26f01380145d43fe7e4e4607e6eb5e8bc3b04d4c " master branch Updating" diff -r 26f01380145d -r 00079fadc240 XSeekerPreparator.R --- a/XSeekerPreparator.R Wed Apr 07 13:28:09 2021 +0000 +++ b/XSeekerPreparator.R Tue Jun 01 09:47:15 2021 +0000 @@ -1,11 +1,16 @@ TOOL_NAME <- "XSeekerPreparator" -VERSION <- "1.2.1" +VERSION <- "1.2.2" + +DEBUG_FAST <- FALSE +DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST +PROCESS_SMOL_BATCH <- DEBUG_FAST +FAST_FEATURE_RATIO <- 10 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" -ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") +ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-") ENRICHED_RDATA_DOC <- sprintf(" Welcome to the enriched of the output of CAMERA/xcms. This doc was generated by the tool: %s - Version %s @@ -554,17 +559,20 @@ next } env <- new.env() - ms_file <- xcms::xcmsRaw(sample_path) - env$tic <- ms_file@tic - env$mz <- ms_file@env$mz - env$scanindex <- ms_file@scanindex - env$scantime <- ms_file@scantime * 60 - env$intensity <- ms_file@env$intensity - env$polarity <- as.character(ms_file@polarity[[1]]) - ## Again, ms file is huge, so we get rid of it quickly. - rm(ms_file) + if (!DEBUG_FAST_IGNORE_SLOW_OP) { + ms_file <- xcms::xcmsRaw(sample_path) + env$tic <- ms_file@tic + env$mz <- ms_file@env$mz + env$scanindex <- ms_file@scanindex + env$scantime <- ms_file@scantime * 60 + env$intensity <- ms_file@env$intensity + env$polarity <- as.character(ms_file@polarity[[1]]) + ## Again, ms file is huge, so we get rid of it quickly. + rm(ms_file) + + } env$sample_name <- sample_name env$dataset_path <- sample_path env$process_params <- process_params @@ -572,6 +580,7 @@ env$enriched_rdata_version <- ENRICHED_RDATA_VERSION env$tool_name <- TOOL_NAME env$enriched_rdata_doc <- ENRICHED_RDATA_DOC + sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) rm (env) context$samples[no] <- sample$get_id() @@ -676,26 +685,29 @@ percent <- -1 total <- nrow(var_meta) } - for (row in seq_len(nrow(var_meta))) { + rows <- seq_len(nrow(var_meta)) + if (PROCESS_SMOL_BATCH) { + + rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] + } + cluster_row <- list() + for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) } - curent_var_meta <- var_meta[row, ] - - - set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) - dummy_feature$set_featureID(next_feature_id) next_feature_id <- next_feature_id + 1 + + curent_var_meta <- var_meta[row, ] + set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) fake_iso <- dummy_feature$get_iso() iso <- extract_iso(fake_iso) clusterID <- extract_clusterID(fake_iso, next_cluster_id) context$clusterID <- clusterID dummy_feature$set_iso(iso) - peak_list <- context$peaks[context$groupidx[[row]], ] if (! ("matrix" %in% class(peak_list))) { peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) @@ -709,21 +721,24 @@ ) } - sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] - if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { - if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { - dummy_feature$set_int_o(int_o) - } - if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { - dummy_feature$set_int_b(int_b) - } - if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { - dummy_feature$set_max_o(max_o) + if (!DEBUG_FAST_IGNORE_SLOW_OP) { + sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] + if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { + if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { + dummy_feature$set_int_o(int_o) + } + if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { + dummy_feature$set_int_b(int_b) + } + if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { + dummy_feature$set_max_o(max_o) + } } } - create_associated_cluster( + + cluster_row[[row]] <- create_associated_cluster( orm, - context$central_feature[[clusterID]], + context$samples[context$central_feature[[clusterID]]][[1]], dummy_feature, clusterID, context, curent_var_meta, next_pc_group, next_align_group @@ -732,10 +747,27 @@ features[[length(features)+1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } - message("")## +\n for previous message + rm(var_meta) + message("") message("Saving features") - rm(var_meta) invisible(dummy_feature$save(bulk=features)) + + ## We link manually clusters to the sample they're in. + link_cache <- list() + for (row in rows) { + sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) + for (sample_id in context$samples[sample_nos]) { + cluster_id <- cluster_row[[row]]$get_id() + if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { + link_cache[[id]] <- 1 + orm$cluster_sample( + sample_id=sample_id, + cluster_id=cluster_id + )$save() + } + } + } + message("Saved.") return (context$clusters) } @@ -790,7 +822,7 @@ create_associated_cluster <- function( orm, - sample_no, feature, clusterID, + main_sample_id, feature, clusterID, context, curent_var_meta, next_pc_group, next_align_group ) { clusterID <- as.character(clusterID) @@ -817,8 +849,8 @@ ## Crappy hack to assign sample id to cluster without loading the sample. ## Samples are too big (their sample$env) and slows the process, and eat all the menory ## so we dont't want to load them. - cluster[["sample_id"]] <- context$samples[sample_no][[1]] - cluster$modified__[["sample_id"]] <- cluster[["sample_id"]] + cluster[["sample_id"]] <- main_sample_id + cluster$modified__[["sample_id"]] <- main_sample_id } else { if (context$clusterID != 0 && cluster$get_clusterID() == 0) { cluster$set_clusterID(context$clusterID) @@ -826,7 +858,7 @@ } cluster$save() feature$set_cluster(cluster) - return (feature) + return (cluster) } complete_features <- function(orm, clusters, show_percent) { diff -r 26f01380145d -r 00079fadc240 XSeekerPreparator.xml --- a/XSeekerPreparator.xml Wed Apr 07 13:28:09 2021 +0000 +++ b/XSeekerPreparator.xml Tue Jun 01 09:47:15 2021 +0000 @@ -1,6 +1,6 @@ Prepare RData file from CAMERA to be visualized in XSeeker @@ -12,7 +12,7 @@ bioconductor-xcms bioconductor-camera - git + git r-blob r-dbi r-fst @@ -20,7 +20,7 @@ r-stringr r-purrr r-rsqlite - r-dbmodelr + r-dbmodelr diff -r 26f01380145d -r 00079fadc240 data/models.R --- a/data/models.R Wed Apr 07 13:28:09 2021 +0000 +++ b/data/models.R Tue Jun 01 09:47:15 2021 +0000 @@ -34,13 +34,13 @@ score="FLOAT", deviation="FLOAT", status="TEXT", - adduct="TEXT", + # adduct="TEXT", curent_group="INTEGER", pc_group="INTEGER", align_group="INTEGER", xcms_group="INTEGER" ), - one=list("sample", "compound") + one=list("compound", "adduct", "sample") ), compound=DBModelR::ModelDefinition( table="compound", @@ -69,7 +69,8 @@ iso="TEXT", abundance="FLOAT" ), - one=list("cluster") + one=list("cluster"), + many=list("sample") ), instrument=DBModelR::ModelDefinition( table="instrument", diff -r 26f01380145d -r 00079fadc240 galaxy/tools/LC-MSMS/XSeekerPreparator.R --- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R Wed Apr 07 13:28:09 2021 +0000 +++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R Tue Jun 01 09:47:15 2021 +0000 @@ -1,11 +1,16 @@ TOOL_NAME <- "XSeekerPreparator" -VERSION <- "1.2.1" +VERSION <- "1.2.2" + +DEBUG_FAST <- FALSE +DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST +PROCESS_SMOL_BATCH <- DEBUG_FAST +FAST_FEATURE_RATIO <- 10 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" -ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") +ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-") ENRICHED_RDATA_DOC <- sprintf(" Welcome to the enriched of the output of CAMERA/xcms. This doc was generated by the tool: %s - Version %s @@ -554,17 +559,20 @@ next } env <- new.env() - ms_file <- xcms::xcmsRaw(sample_path) - env$tic <- ms_file@tic - env$mz <- ms_file@env$mz - env$scanindex <- ms_file@scanindex - env$scantime <- ms_file@scantime * 60 - env$intensity <- ms_file@env$intensity - env$polarity <- as.character(ms_file@polarity[[1]]) - ## Again, ms file is huge, so we get rid of it quickly. - rm(ms_file) + if (!DEBUG_FAST_IGNORE_SLOW_OP) { + ms_file <- xcms::xcmsRaw(sample_path) + env$tic <- ms_file@tic + env$mz <- ms_file@env$mz + env$scanindex <- ms_file@scanindex + env$scantime <- ms_file@scantime * 60 + env$intensity <- ms_file@env$intensity + env$polarity <- as.character(ms_file@polarity[[1]]) + ## Again, ms file is huge, so we get rid of it quickly. + rm(ms_file) + + } env$sample_name <- sample_name env$dataset_path <- sample_path env$process_params <- process_params @@ -572,6 +580,7 @@ env$enriched_rdata_version <- ENRICHED_RDATA_VERSION env$tool_name <- TOOL_NAME env$enriched_rdata_doc <- ENRICHED_RDATA_DOC + sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) rm (env) context$samples[no] <- sample$get_id() @@ -676,26 +685,29 @@ percent <- -1 total <- nrow(var_meta) } - for (row in seq_len(nrow(var_meta))) { + rows <- seq_len(nrow(var_meta)) + if (PROCESS_SMOL_BATCH) { + + rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] + } + cluster_row <- list() + for (row in rows) { if (show_percent && (row / total) * 100 > percent) { percent <- percent + 1 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) } - curent_var_meta <- var_meta[row, ] - - - set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) - dummy_feature$set_featureID(next_feature_id) next_feature_id <- next_feature_id + 1 + + curent_var_meta <- var_meta[row, ] + set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) fake_iso <- dummy_feature$get_iso() iso <- extract_iso(fake_iso) clusterID <- extract_clusterID(fake_iso, next_cluster_id) context$clusterID <- clusterID dummy_feature$set_iso(iso) - peak_list <- context$peaks[context$groupidx[[row]], ] if (! ("matrix" %in% class(peak_list))) { peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) @@ -709,21 +721,24 @@ ) } - sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] - if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { - if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { - dummy_feature$set_int_o(int_o) - } - if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { - dummy_feature$set_int_b(int_b) - } - if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { - dummy_feature$set_max_o(max_o) + if (!DEBUG_FAST_IGNORE_SLOW_OP) { + sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] + if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { + if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { + dummy_feature$set_int_o(int_o) + } + if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { + dummy_feature$set_int_b(int_b) + } + if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { + dummy_feature$set_max_o(max_o) + } } } - create_associated_cluster( + + cluster_row[[row]] <- create_associated_cluster( orm, - context$central_feature[[clusterID]], + context$samples[context$central_feature[[clusterID]]][[1]], dummy_feature, clusterID, context, curent_var_meta, next_pc_group, next_align_group @@ -732,10 +747,27 @@ features[[length(features)+1]] <- as.list(dummy_feature, field_names) dummy_feature$clear() } - message("")## +\n for previous message + rm(var_meta) + message("") message("Saving features") - rm(var_meta) invisible(dummy_feature$save(bulk=features)) + + ## We link manually clusters to the sample they're in. + link_cache <- list() + for (row in rows) { + sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) + for (sample_id in context$samples[sample_nos]) { + cluster_id <- cluster_row[[row]]$get_id() + if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { + link_cache[[id]] <- 1 + orm$cluster_sample( + sample_id=sample_id, + cluster_id=cluster_id + )$save() + } + } + } + message("Saved.") return (context$clusters) } @@ -790,7 +822,7 @@ create_associated_cluster <- function( orm, - sample_no, feature, clusterID, + main_sample_id, feature, clusterID, context, curent_var_meta, next_pc_group, next_align_group ) { clusterID <- as.character(clusterID) @@ -817,8 +849,8 @@ ## Crappy hack to assign sample id to cluster without loading the sample. ## Samples are too big (their sample$env) and slows the process, and eat all the menory ## so we dont't want to load them. - cluster[["sample_id"]] <- context$samples[sample_no][[1]] - cluster$modified__[["sample_id"]] <- cluster[["sample_id"]] + cluster[["sample_id"]] <- main_sample_id + cluster$modified__[["sample_id"]] <- main_sample_id } else { if (context$clusterID != 0 && cluster$get_clusterID() == 0) { cluster$set_clusterID(context$clusterID) @@ -826,7 +858,7 @@ } cluster$save() feature$set_cluster(cluster) - return (feature) + return (cluster) } complete_features <- function(orm, clusters, show_percent) { diff -r 26f01380145d -r 00079fadc240 galaxy/tools/LC-MSMS/XSeekerPreparator.xml --- a/galaxy/tools/LC-MSMS/XSeekerPreparator.xml Wed Apr 07 13:28:09 2021 +0000 +++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.xml Tue Jun 01 09:47:15 2021 +0000 @@ -1,6 +1,6 @@ Prepare RData file from CAMERA to be visualized in XSeeker @@ -12,7 +12,7 @@ bioconductor-xcms bioconductor-camera - git + git r-blob r-dbi r-fst @@ -20,7 +20,7 @@ r-stringr r-purrr r-rsqlite - r-dbmodelr + r-dbmodelr