Mercurial > repos > recetox > recetox_aplcms_compute_clusters

--- a/macros.xml	Wed Jul 19 00:27:18 2023 +0000
+++ b/macros.xml	Wed Oct 11 11:17:39 2023 +0000
@@ -13,6 +13,9 @@
               <edam_topic>topic_0091</edam_topic>
               <edam_topic>topic_3520</edam_topic>
        </edam_topics>
+    </xml>
+
+    <xml name="refs">
        <xrefs>
               <xref type="bio.tools">recetox-aplcms</xref>
        </xrefs>
@@ -81,14 +84,18 @@
                         help="The upper limit of the ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function to fit the data." />
             </section>
             <conditional name="sd_cut">
-                <param name="sd_cut_bounds" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Standard deviations boundaries."
-                       help="Limit the standard deviations by setting boundaries." />
+                <param name="sd_cut_bounds" type="select" label="Standard deviations boundaries."
+                       help="Limit the standard deviations by setting boundaries.">
+                     <option value="FALSE">FALSE</option>
+                     <option value="TRUE" selected="true">TRUE</option>
+                </param>
                 <when value="TRUE">
                     <param name="sd_cut_min" type="float" value="0.01" label="Minimal standard deviation"
                            help="The minimum standard deviation - features with a standard deviation lower than this number are eliminated." />
                     <param name="sd_cut_max" type="float" value="500" label="Maximal standard deviation"
                             help="The maximum standard deviation - features with a standard deviation greater than this number are eliminated." />
                 </when>
+                <when value="FALSE"></when>
             </conditional>
             <conditional name="peak_estim">
                 <param name="peak_estim_method" type="select" display="radio" label="Peak estimation method"
@@ -100,6 +107,7 @@
                     <param name="moment_power" type="float" value="1" label="Moment power"
                            help="The power parameter for data transformation when fitting the bi-Gaussian mixture model in an EIC." />
                 </when>
+                <when value="EM"></when>
             </conditional>
         </section>
     </xml>
--- a/recetox_aplcms_compute_clusters.xml	Wed Jul 19 00:27:18 2023 +0000
+++ b/recetox_aplcms_compute_clusters.xml	Wed Oct 11 11:17:39 2023 +0000
@@ -1,13 +1,14 @@
-<tool id="recetox_aplcms_compute_clusters" name="recetox-aplcms - compute clusters" version="@TOOL_VERSION@+galaxy2" profile="21.09">
+<tool id="recetox_aplcms_compute_clusters" name="recetox-aplcms - compute clusters" version="@TOOL_VERSION@+galaxy3" profile="21.09">
     <description>compute clusters of mz and rt across samples and assign cluster IDs to individual features</description>
     <macros>
         <import>macros.xml</import>
         <import>help.xml</import>
     </macros>
+    <expand macro="annotation"/>
     <edam_operations>
         <edam_operation>operation_2928</edam_operation>
     </edam_operations>
-    <expand macro="annotation"/>
+    <expand macro="refs"/>
     <expand macro="creator"/>
     <expand macro="requirements"/>

@@ -52,7 +53,7 @@
     </outputs>

     <tests>
-
+        <test expect_failure="true" />
     </tests>

     <help>
Binary file test-data/peak_table_galaxy.parquet has changed
--- a/utils.R	Wed Jul 19 00:27:18 2023 +0000
+++ b/utils.R	Wed Oct 11 11:17:39 2023 +0000
@@ -1,94 +1,98 @@
 library(recetox.aplcms)

 get_env_sample_name <- function() {
-    sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA)
-    if (nchar(sample_name) == 0) {
-        sample_name <- NA
-    }
-    if (is.na(sample_name)) {
-        message("The mzML file does not contain run ID.")
-    }
-    return(sample_name)
+  sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA)
+  if (nchar(sample_name) == 0) {
+    sample_name <- NA
+  }
+  if (is.na(sample_name)) {
+    message("The mzML file does not contain run ID.")
+  }
+  return(sample_name)
 }

 save_sample_name <- function(df, sample_name) {
-    attr(df, "sample_name") <- sample_name
-    return(df)
+  attr(df, "sample_name") <- sample_name
+  return(df)
 }

 restore_sample_name <- function(df) {
-    return(df$sample_id[1])
+  return(df$sample_id[1])
 }

 load_sample_name <- function(df) {
-    sample_name <- attr(df, "sample_name")
-    if (is.null(sample_name)) {
-        return(NA)
-    } else {
-        return(sample_name)
-    }
+  sample_name <- attr(df, "sample_name")
+  if (is.null(sample_name)) {
+    return(NA)
+  } else {
+    return(sample_name)
+  }
 }

 save_data_as_parquet_file <- function(data, filename) {
-    arrow::write_parquet(data, filename)
+  arrow::write_parquet(data, filename)
 }

 load_data_from_parquet_file <- function(filename) {
-    return(arrow::read_parquet(filename))
+  return(arrow::read_parquet(filename))
 }

 load_parquet_collection <- function(files) {
-    features <- lapply(files, arrow::read_parquet)
-    features <- lapply(features, tibble::as_tibble)
-    return(features)
+  features <- lapply(files, arrow::read_parquet)
+  features <- lapply(features, tibble::as_tibble)
+  return(features)
 }

 save_parquet_collection <- function(feature_tables, sample_names, subdir) {
-    dir.create(subdir)
-    for (i in seq_len(length(feature_tables))) {
-      filename <- file.path(subdir, paste0(sample_names[i], ".parquet"))
-      feature_table <- as.data.frame(feature_tables[[i]])
-      feature_table <- save_sample_name(feature_table, sample_names[i])
-      arrow::write_parquet(feature_table, filename)
-    }
+  dir.create(subdir)
+  for (i in seq_len(length(feature_tables))) {
+    filename <- file.path(subdir, paste0(sample_names[i], ".parquet"))
+    feature_table <- as.data.frame(feature_tables[[i]])
+    feature_table <- save_sample_name(feature_table, sample_names[i])
+    arrow::write_parquet(feature_table, filename)
+  }
 }

 sort_by_sample_name <- function(tables, sample_names) {
-    return(tables[order(sample_names)])
+  return(tables[order(sample_names)])
 }

 save_tolerances <- function(table, tol_file) {
-    mz_tolerance <- c(table$mz_tol_relative)
-    rt_tolerance <- c(table$rt_tol_relative)
-    arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file)
+  mz_tolerance <- c(table$mz_tol_relative)
+  rt_tolerance <- c(table$rt_tol_relative)
+  arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file)
 }

 save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) {
-    save_data_as_parquet_file(aligned_features$metadata, metadata_file)
-    save_data_as_parquet_file(aligned_features$rt, rt_file)
-    save_data_as_parquet_file(aligned_features$intensity, intensity_file)
+  save_data_as_parquet_file(aligned_features$metadata, metadata_file)
+  save_data_as_parquet_file(aligned_features$rt, rt_file)
+  save_data_as_parquet_file(aligned_features$intensity, intensity_file)
 }

 select_table_with_sample_name <- function(tables, sample_name) {
-    sample_names <- lapply(tables, load_sample_name)
-    index <- which(sample_names == sample_name)
-    if (length(index) > 0) {
-        return(tables[[index]])
-    } else {
-        stop(sprintf("Mismatch - sample name '%s' not present in %s",
-                     sample_name, paste(sample_names, collapse = ", ")))
-    }
+  sample_names <- lapply(tables, load_sample_name)
+  index <- which(sample_names == sample_name)
+  if (length(index) > 0) {
+    return(tables[[index]])
+  } else {
+    stop(sprintf(
+      "Mismatch - sample name '%s' not present in %s",
+      sample_name, paste(sample_names, collapse = ", ")
+    ))
+  }
 }

 select_adjusted <- function(recovered_features) {
-    return(recovered_features$adjusted_features)
+  return(recovered_features$adjusted_features)
 }

 known_table_columns <- function() {
-  c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
+  c(
+    "chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
     "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
     "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)",
-    "int_min(log)", "int_max(log)")
+    "int_min(log)", "int_max(log)"
+  )
 }

 save_known_table <- function(table, filename) {
@@ -101,7 +105,9 @@
 }

 save_pairing <- function(table, filename) {
-  df <- table$pairing %>% as_tibble() %>% setNames(c("new", "old"))
+  df <- table$pairing %>%
+    as_tibble() %>%
+    setNames(c("new", "old"))
   arrow::write_parquet(df, filename)
 }

@@ -114,18 +120,20 @@
 }

 validate_sample_names <- function(sample_names) {
-    if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) {
-        stop(sprintf("Sample names absent or not unique - provided sample names: %s",
-                     paste(sample_names, collapse = ", ")))
-    }
+  if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) {
+    stop(sprintf(
+      "Sample names absent or not unique - provided sample names: %s",
+      paste(sample_names, collapse = ", ")
+    ))
+  }
 }

 determine_sigma_ratios <- function(sigma_ratio_lim_min = NA, sigma_ratio_lim_max = NA) {
-    if (is.na(sigma_ratio_lim_min)) {
-        sigma_ratio_lim_min <- 0
-    }
-    if (is.na(sigma_ratio_lim_max)) {
-        sigma_ratio_lim_max <- Inf
-    }
-    return(c(sigma_ratio_lim_min, sigma_ratio_lim_max))
+  if (is.na(sigma_ratio_lim_min)) {
+    sigma_ratio_lim_min <- 0
+  }
+  if (is.na(sigma_ratio_lim_max)) {
+    sigma_ratio_lim_max <- Inf
+  }
+  return(c(sigma_ratio_lim_min, sigma_ratio_lim_max))
 }