Mercurial > repos > recetox > recetox_aplcms_align_features
changeset 10:2b6bfa234d8b draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 97249a1af94ac5c387e1ede274dec5364f71cde9
author | recetox |
---|---|
date | Wed, 11 Oct 2023 11:19:39 +0000 |
parents | 7b42ee54d757 |
children | d5c638b72ce4 |
files | macros.xml recetox_aplcms_align_features.xml test-data/peak_table_galaxy.parquet utils.R |
diffstat | 4 files changed, 81 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Wed Jul 19 00:27:03 2023 +0000 +++ b/macros.xml Wed Oct 11 11:19:39 2023 +0000 @@ -13,6 +13,9 @@ <edam_topic>topic_0091</edam_topic> <edam_topic>topic_3520</edam_topic> </edam_topics> + </xml> + + <xml name="refs"> <xrefs> <xref type="bio.tools">recetox-aplcms</xref> </xrefs> @@ -81,14 +84,18 @@ help="The upper limit of the ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function to fit the data." /> </section> <conditional name="sd_cut"> - <param name="sd_cut_bounds" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Standard deviations boundaries." - help="Limit the standard deviations by setting boundaries." /> + <param name="sd_cut_bounds" type="select" label="Standard deviations boundaries." + help="Limit the standard deviations by setting boundaries."> + <option value="FALSE">FALSE</option> + <option value="TRUE" selected="true">TRUE</option> + </param> <when value="TRUE"> <param name="sd_cut_min" type="float" value="0.01" label="Minimal standard deviation" help="The minimum standard deviation - features with a standard deviation lower than this number are eliminated." /> <param name="sd_cut_max" type="float" value="500" label="Maximal standard deviation" help="The maximum standard deviation - features with a standard deviation greater than this number are eliminated." /> </when> + <when value="FALSE"></when> </conditional> <conditional name="peak_estim"> <param name="peak_estim_method" type="select" display="radio" label="Peak estimation method" @@ -100,6 +107,7 @@ <param name="moment_power" type="float" value="1" label="Moment power" help="The power parameter for data transformation when fitting the bi-Gaussian mixture model in an EIC." /> </when> + <when value="EM"></when> </conditional> </section> </xml>
--- a/recetox_aplcms_align_features.xml Wed Jul 19 00:27:03 2023 +0000 +++ b/recetox_aplcms_align_features.xml Wed Oct 11 11:19:39 2023 +0000 @@ -1,14 +1,14 @@ -<tool id="recetox_aplcms_align_features" name="recetox-aplcms - align features" version="@TOOL_VERSION@+galaxy1" profile="21.09"> +<tool id="recetox_aplcms_align_features" name="recetox-aplcms - align features" version="@TOOL_VERSION@+galaxy2" profile="21.09"> <description>align peaks across samples</description> <macros> <import>macros.xml</import> <import>help.xml</import> </macros> - + <expand macro="annotation"/> <edam_operations> <edam_operation>operation_2928</edam_operation> </edam_operations> - <expand macro="annotation"/> + <expand macro="refs"/> <expand macro="creator"/> <expand macro="requirements"/> @@ -56,7 +56,7 @@ </outputs> <tests> - + <test expect_failure="true" /> </tests> <help>
--- a/utils.R Wed Jul 19 00:27:03 2023 +0000 +++ b/utils.R Wed Oct 11 11:19:39 2023 +0000 @@ -1,94 +1,98 @@ library(recetox.aplcms) get_env_sample_name <- function() { - sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA) - if (nchar(sample_name) == 0) { - sample_name <- NA - } - if (is.na(sample_name)) { - message("The mzML file does not contain run ID.") - } - return(sample_name) + sample_name <- Sys.getenv("SAMPLE_NAME", unset = NA) + if (nchar(sample_name) == 0) { + sample_name <- NA + } + if (is.na(sample_name)) { + message("The mzML file does not contain run ID.") + } + return(sample_name) } save_sample_name <- function(df, sample_name) { - attr(df, "sample_name") <- sample_name - return(df) + attr(df, "sample_name") <- sample_name + return(df) } restore_sample_name <- function(df) { - return(df$sample_id[1]) + return(df$sample_id[1]) } load_sample_name <- function(df) { - sample_name <- attr(df, "sample_name") - if (is.null(sample_name)) { - return(NA) - } else { - return(sample_name) - } + sample_name <- attr(df, "sample_name") + if (is.null(sample_name)) { + return(NA) + } else { + return(sample_name) + } } save_data_as_parquet_file <- function(data, filename) { - arrow::write_parquet(data, filename) + arrow::write_parquet(data, filename) } load_data_from_parquet_file <- function(filename) { - return(arrow::read_parquet(filename)) + return(arrow::read_parquet(filename)) } load_parquet_collection <- function(files) { - features <- lapply(files, arrow::read_parquet) - features <- lapply(features, tibble::as_tibble) - return(features) + features <- lapply(files, arrow::read_parquet) + features <- lapply(features, tibble::as_tibble) + return(features) } save_parquet_collection <- function(feature_tables, sample_names, subdir) { - dir.create(subdir) - for (i in seq_len(length(feature_tables))) { - filename <- file.path(subdir, paste0(sample_names[i], ".parquet")) - feature_table <- as.data.frame(feature_tables[[i]]) - feature_table <- save_sample_name(feature_table, sample_names[i]) - arrow::write_parquet(feature_table, filename) - } + dir.create(subdir) + for (i in seq_len(length(feature_tables))) { + filename <- file.path(subdir, paste0(sample_names[i], ".parquet")) + feature_table <- as.data.frame(feature_tables[[i]]) + feature_table <- save_sample_name(feature_table, sample_names[i]) + arrow::write_parquet(feature_table, filename) + } } sort_by_sample_name <- function(tables, sample_names) { - return(tables[order(sample_names)]) + return(tables[order(sample_names)]) } save_tolerances <- function(table, tol_file) { - mz_tolerance <- c(table$mz_tol_relative) - rt_tolerance <- c(table$rt_tol_relative) - arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) + mz_tolerance <- c(table$mz_tol_relative) + rt_tolerance <- c(table$rt_tol_relative) + arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) } save_aligned_features <- function(aligned_features, metadata_file, rt_file, intensity_file) { - save_data_as_parquet_file(aligned_features$metadata, metadata_file) - save_data_as_parquet_file(aligned_features$rt, rt_file) - save_data_as_parquet_file(aligned_features$intensity, intensity_file) + save_data_as_parquet_file(aligned_features$metadata, metadata_file) + save_data_as_parquet_file(aligned_features$rt, rt_file) + save_data_as_parquet_file(aligned_features$intensity, intensity_file) } select_table_with_sample_name <- function(tables, sample_name) { - sample_names <- lapply(tables, load_sample_name) - index <- which(sample_names == sample_name) - if (length(index) > 0) { - return(tables[[index]]) - } else { - stop(sprintf("Mismatch - sample name '%s' not present in %s", - sample_name, paste(sample_names, collapse = ", "))) - } + sample_names <- lapply(tables, load_sample_name) + index <- which(sample_names == sample_name) + if (length(index) > 0) { + return(tables[[index]]) + } else { + stop(sprintf( + "Mismatch - sample name '%s' not present in %s", + sample_name, paste(sample_names, collapse = ", ") + )) + } } select_adjusted <- function(recovered_features) { - return(recovered_features$adjusted_features) + return(recovered_features$adjusted_features) } known_table_columns <- function() { - c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", + c( + "chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)", - "int_min(log)", "int_max(log)") + "int_min(log)", "int_max(log)" + ) } save_known_table <- function(table, filename) { @@ -101,7 +105,9 @@ } save_pairing <- function(table, filename) { - df <- table$pairing %>% as_tibble() %>% setNames(c("new", "old")) + df <- table$pairing %>% + as_tibble() %>% + setNames(c("new", "old")) arrow::write_parquet(df, filename) } @@ -114,18 +120,20 @@ } validate_sample_names <- function(sample_names) { - if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) { - stop(sprintf("Sample names absent or not unique - provided sample names: %s", - paste(sample_names, collapse = ", "))) - } + if ((any(is.na(sample_names))) || (length(unique(sample_names)) != length(sample_names))) { + stop(sprintf( + "Sample names absent or not unique - provided sample names: %s", + paste(sample_names, collapse = ", ") + )) + } } determine_sigma_ratios <- function(sigma_ratio_lim_min = NA, sigma_ratio_lim_max = NA) { - if (is.na(sigma_ratio_lim_min)) { - sigma_ratio_lim_min <- 0 - } - if (is.na(sigma_ratio_lim_max)) { - sigma_ratio_lim_max <- Inf - } - return(c(sigma_ratio_lim_min, sigma_ratio_lim_max)) + if (is.na(sigma_ratio_lim_min)) { + sigma_ratio_lim_min <- 0 + } + if (is.na(sigma_ratio_lim_max)) { + sigma_ratio_lim_max <- Inf + } + return(c(sigma_ratio_lim_min, sigma_ratio_lim_max)) }