Mercurial > repos > lain > xseekerpreparator

--- a/XSeekerPreparator.R	Wed Apr 07 13:28:09 2021 +0000
+++ b/XSeekerPreparator.R	Tue Jun 01 09:47:15 2021 +0000
@@ -1,11 +1,16 @@


 TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.2.1"
+VERSION <- "1.2.2"
+
+DEBUG_FAST <- FALSE
+DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
+PROCESS_SMOL_BATCH <- DEBUG_FAST
+FAST_FEATURE_RATIO <- 10

 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"

-ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-")
 ENRICHED_RDATA_DOC <- sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
@@ -554,17 +559,20 @@
             next
         }
         env <- new.env()
-        ms_file <- xcms::xcmsRaw(sample_path)
-        env$tic <- ms_file@tic
-        env$mz <- ms_file@env$mz
-        env$scanindex <- ms_file@scanindex
-        env$scantime <- ms_file@scantime * 60
-        env$intensity <- ms_file@env$intensity
-        env$polarity <- as.character(ms_file@polarity[[1]])

-        ## Again, ms file is huge, so we get rid of it quickly.
-        rm(ms_file)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            ms_file <- xcms::xcmsRaw(sample_path)
+            env$tic <- ms_file@tic
+            env$mz <- ms_file@env$mz
+            env$scanindex <- ms_file@scanindex
+            env$scantime <- ms_file@scantime * 60
+            env$intensity <- ms_file@env$intensity
+            env$polarity <- as.character(ms_file@polarity[[1]])

+            ## Again, ms file is huge, so we get rid of it quickly.
+            rm(ms_file)
+
+        }
         env$sample_name <- sample_name
         env$dataset_path <- sample_path
         env$process_params <- process_params
@@ -572,6 +580,7 @@
         env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
         env$tool_name <- TOOL_NAME
         env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
+
         sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
         rm (env)
         context$samples[no] <- sample$get_id()
@@ -676,26 +685,29 @@
         percent <- -1
         total <- nrow(var_meta)
     }
-    for (row in seq_len(nrow(var_meta))) {
+    rows <- seq_len(nrow(var_meta))
+    if (PROCESS_SMOL_BATCH) {
+
+        rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+    }
+    cluster_row <- list()
+    for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
             message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
         }

-        curent_var_meta <- var_meta[row, ]
-
-
-        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
-
         dummy_feature$set_featureID(next_feature_id)
         next_feature_id <- next_feature_id + 1
+
+        curent_var_meta <- var_meta[row, ]
+        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
         fake_iso <- dummy_feature$get_iso()
         iso <- extract_iso(fake_iso)
         clusterID <- extract_clusterID(fake_iso, next_cluster_id)
         context$clusterID <- clusterID
         dummy_feature$set_iso(iso)

-
         peak_list <- context$peaks[context$groupidx[[row]], ]
         if (! ("matrix" %in% class(peak_list))) {
             peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
@@ -709,21 +721,24 @@
             )
         }

-        sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
-        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
-                dummy_feature$set_int_o(int_o)
-            }
-            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
-                dummy_feature$set_int_b(int_b)
-            }
-            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
-                dummy_feature$set_max_o(max_o)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
+            if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+                if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+                    dummy_feature$set_int_o(int_o)
+                }
+                if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                    dummy_feature$set_int_b(int_b)
+                }
+                if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                    dummy_feature$set_max_o(max_o)
+                }
             }
         }
-        create_associated_cluster(
+
+        cluster_row[[row]] <- create_associated_cluster(
             orm,
-            context$central_feature[[clusterID]],
+            context$samples[context$central_feature[[clusterID]]][[1]],
             dummy_feature, clusterID,
             context, curent_var_meta, next_pc_group,
             next_align_group
@@ -732,10 +747,27 @@
         features[[length(features)+1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
-    message("")## +\n for previous message
+    rm(var_meta)
+    message("")
     message("Saving features")
-    rm(var_meta)
     invisible(dummy_feature$save(bulk=features))
+
+    ## We link manually clusters to the sample they're in.
+    link_cache <- list()
+    for (row in rows) {
+        sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
+        for (sample_id in context$samples[sample_nos]) {
+            cluster_id <- cluster_row[[row]]$get_id()
+            if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+                link_cache[[id]] <- 1
+                orm$cluster_sample(
+                    sample_id=sample_id,
+                    cluster_id=cluster_id
+                )$save()
+            }
+        }
+    }
+
     message("Saved.")
     return (context$clusters)
 }
@@ -790,7 +822,7 @@

 create_associated_cluster <- function(
     orm,
-    sample_no, feature, clusterID,
+    main_sample_id, feature, clusterID,
     context, curent_var_meta, next_pc_group, next_align_group
 ) {
     clusterID <- as.character(clusterID)
@@ -817,8 +849,8 @@
         ## Crappy hack to assign sample id to cluster without loading the sample.
         ## Samples are too big (their sample$env) and slows the process, and eat all the menory
         ## so we dont't want to load them.
-        cluster[["sample_id"]] <- context$samples[sample_no][[1]]
-        cluster$modified__[["sample_id"]] <- cluster[["sample_id"]]
+        cluster[["sample_id"]] <- main_sample_id
+        cluster$modified__[["sample_id"]] <- main_sample_id
     } else {
         if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
             cluster$set_clusterID(context$clusterID)
@@ -826,7 +858,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
-    return (feature)
+    return (cluster)
 }

 complete_features <- function(orm, clusters, show_percent) {
--- a/XSeekerPreparator.xml	Wed Apr 07 13:28:09 2021 +0000
+++ b/XSeekerPreparator.xml	Tue Jun 01 09:47:15 2021 +0000
@@ -1,6 +1,6 @@
 <tool id="xseeker_preparator"
       name="XSeeker Preparator"
-      version="1.2.1"
+      version="1.2.2"
 >
     <description>Prepare RData file from CAMERA to be visualized in XSeeker</description>

@@ -12,7 +12,7 @@
     <requirements>
         <requirement type="package" version="3.12.0">bioconductor-xcms</requirement>
         <requirement type="package">bioconductor-camera</requirement>
-        <requirement type="package" version="2.29.2">git</requirement>
+        <requirement type="package" version="2.29.2">git</requirement>
         <requirement type="package" version="1.2.1">r-blob</requirement>
         <requirement type="package" version="1.1.0">r-dbi</requirement>
         <requirement type="package" version="0.9.4">r-fst</requirement>
@@ -20,7 +20,7 @@
         <requirement type="package" version="1.4.0">r-stringr</requirement>
         <requirement type="package" version="0.3.4">r-purrr</requirement>
         <requirement type="package" version="2.2.1">r-rsqlite</requirement>
-        <requirement type="package" version="0.1.0">r-dbmodelr</requirement>
+        <requirement type="package" version="0.2.0">r-dbmodelr</requirement>
     </requirements>
     <stdio>
--- a/data/models.R	Wed Apr 07 13:28:09 2021 +0000
+++ b/data/models.R	Tue Jun 01 09:47:15 2021 +0000
@@ -34,13 +34,13 @@
             score="FLOAT",
             deviation="FLOAT",
             status="TEXT",
-            adduct="TEXT",
+            # adduct="TEXT",
             curent_group="INTEGER",
             pc_group="INTEGER",
             align_group="INTEGER",
             xcms_group="INTEGER"
         ),
-        one=list("sample", "compound")
+        one=list("compound", "adduct", "sample")
     ),
     compound=DBModelR::ModelDefinition(
         table="compound",
@@ -69,7 +69,8 @@
             iso="TEXT",
             abundance="FLOAT"
         ),
-        one=list("cluster")
+        one=list("cluster"),
+        many=list("sample")
     ),
     instrument=DBModelR::ModelDefinition(
         table="instrument",
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Wed Apr 07 13:28:09 2021 +0000
+++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.R	Tue Jun 01 09:47:15 2021 +0000
@@ -1,11 +1,16 @@


 TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.2.1"
+VERSION <- "1.2.2"
+
+DEBUG_FAST <- FALSE
+DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
+PROCESS_SMOL_BATCH <- DEBUG_FAST
+FAST_FEATURE_RATIO <- 10

 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"

-ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-")
 ENRICHED_RDATA_DOC <- sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
@@ -554,17 +559,20 @@
             next
         }
         env <- new.env()
-        ms_file <- xcms::xcmsRaw(sample_path)
-        env$tic <- ms_file@tic
-        env$mz <- ms_file@env$mz
-        env$scanindex <- ms_file@scanindex
-        env$scantime <- ms_file@scantime * 60
-        env$intensity <- ms_file@env$intensity
-        env$polarity <- as.character(ms_file@polarity[[1]])

-        ## Again, ms file is huge, so we get rid of it quickly.
-        rm(ms_file)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            ms_file <- xcms::xcmsRaw(sample_path)
+            env$tic <- ms_file@tic
+            env$mz <- ms_file@env$mz
+            env$scanindex <- ms_file@scanindex
+            env$scantime <- ms_file@scantime * 60
+            env$intensity <- ms_file@env$intensity
+            env$polarity <- as.character(ms_file@polarity[[1]])

+            ## Again, ms file is huge, so we get rid of it quickly.
+            rm(ms_file)
+
+        }
         env$sample_name <- sample_name
         env$dataset_path <- sample_path
         env$process_params <- process_params
@@ -572,6 +580,7 @@
         env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
         env$tool_name <- TOOL_NAME
         env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
+
         sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
         rm (env)
         context$samples[no] <- sample$get_id()
@@ -676,26 +685,29 @@
         percent <- -1
         total <- nrow(var_meta)
     }
-    for (row in seq_len(nrow(var_meta))) {
+    rows <- seq_len(nrow(var_meta))
+    if (PROCESS_SMOL_BATCH) {
+
+        rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+    }
+    cluster_row <- list()
+    for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
             message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
         }

-        curent_var_meta <- var_meta[row, ]
-
-
-        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
-
         dummy_feature$set_featureID(next_feature_id)
         next_feature_id <- next_feature_id + 1
+
+        curent_var_meta <- var_meta[row, ]
+        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
         fake_iso <- dummy_feature$get_iso()
         iso <- extract_iso(fake_iso)
         clusterID <- extract_clusterID(fake_iso, next_cluster_id)
         context$clusterID <- clusterID
         dummy_feature$set_iso(iso)

-
         peak_list <- context$peaks[context$groupidx[[row]], ]
         if (! ("matrix" %in% class(peak_list))) {
             peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
@@ -709,21 +721,24 @@
             )
         }

-        sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
-        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
-                dummy_feature$set_int_o(int_o)
-            }
-            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
-                dummy_feature$set_int_b(int_b)
-            }
-            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
-                dummy_feature$set_max_o(max_o)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
+            if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+                if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+                    dummy_feature$set_int_o(int_o)
+                }
+                if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                    dummy_feature$set_int_b(int_b)
+                }
+                if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                    dummy_feature$set_max_o(max_o)
+                }
             }
         }
-        create_associated_cluster(
+
+        cluster_row[[row]] <- create_associated_cluster(
             orm,
-            context$central_feature[[clusterID]],
+            context$samples[context$central_feature[[clusterID]]][[1]],
             dummy_feature, clusterID,
             context, curent_var_meta, next_pc_group,
             next_align_group
@@ -732,10 +747,27 @@
         features[[length(features)+1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
-    message("")## +\n for previous message
+    rm(var_meta)
+    message("")
     message("Saving features")
-    rm(var_meta)
     invisible(dummy_feature$save(bulk=features))
+
+    ## We link manually clusters to the sample they're in.
+    link_cache <- list()
+    for (row in rows) {
+        sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
+        for (sample_id in context$samples[sample_nos]) {
+            cluster_id <- cluster_row[[row]]$get_id()
+            if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+                link_cache[[id]] <- 1
+                orm$cluster_sample(
+                    sample_id=sample_id,
+                    cluster_id=cluster_id
+                )$save()
+            }
+        }
+    }
+
     message("Saved.")
     return (context$clusters)
 }
@@ -790,7 +822,7 @@

 create_associated_cluster <- function(
     orm,
-    sample_no, feature, clusterID,
+    main_sample_id, feature, clusterID,
     context, curent_var_meta, next_pc_group, next_align_group
 ) {
     clusterID <- as.character(clusterID)
@@ -817,8 +849,8 @@
         ## Crappy hack to assign sample id to cluster without loading the sample.
         ## Samples are too big (their sample$env) and slows the process, and eat all the menory
         ## so we dont't want to load them.
-        cluster[["sample_id"]] <- context$samples[sample_no][[1]]
-        cluster$modified__[["sample_id"]] <- cluster[["sample_id"]]
+        cluster[["sample_id"]] <- main_sample_id
+        cluster$modified__[["sample_id"]] <- main_sample_id
     } else {
         if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
             cluster$set_clusterID(context$clusterID)
@@ -826,7 +858,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
-    return (feature)
+    return (cluster)
 }

 complete_features <- function(orm, clusters, show_percent) {
--- a/galaxy/tools/LC-MSMS/XSeekerPreparator.xml	Wed Apr 07 13:28:09 2021 +0000
+++ b/galaxy/tools/LC-MSMS/XSeekerPreparator.xml	Tue Jun 01 09:47:15 2021 +0000
@@ -1,6 +1,6 @@
 <tool id="xseeker_preparator"
       name="XSeeker Preparator"
-      version="1.2.1"
+      version="1.2.2"
 >
     <description>Prepare RData file from CAMERA to be visualized in XSeeker</description>

@@ -12,7 +12,7 @@
     <requirements>
         <requirement type="package" version="3.12.0">bioconductor-xcms</requirement>
         <requirement type="package">bioconductor-camera</requirement>
-        <requirement type="package" version="2.29.2">git</requirement>
+        <requirement type="package" version="2.29.2">git</requirement>
         <requirement type="package" version="1.2.1">r-blob</requirement>
         <requirement type="package" version="1.1.0">r-dbi</requirement>
         <requirement type="package" version="0.9.4">r-fst</requirement>
@@ -20,7 +20,7 @@
         <requirement type="package" version="1.4.0">r-stringr</requirement>
         <requirement type="package" version="0.3.4">r-purrr</requirement>
         <requirement type="package" version="2.2.1">r-rsqlite</requirement>
-        <requirement type="package" version="0.1.0">r-dbmodelr</requirement>
+        <requirement type="package" version="0.2.0">r-dbmodelr</requirement>
     </requirements>
     <stdio>