Mercurial > repos > recetox > waveica
changeset 7:1a2aeb8137bf draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
author | recetox |
---|---|
date | Thu, 06 Jun 2024 12:25:05 +0000 |
parents | 071a424241ec |
children | bf32ae95a06f |
files | macros.xml test-data/test10_output1.tsv test-data/test10_output2.tsv test-data/test9_output1.parquet test-data/test9_output2.parquet waveica.xml waveica_wrapper.R |
diffstat | 7 files changed, 129 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu May 30 14:54:02 2024 +0000 +++ b/macros.xml Thu Jun 06 12:25:05 2024 +0000 @@ -96,10 +96,20 @@ </when> </conditional> </xml> + <xml name="split_output"> + <param name = "keep_two_output" label="Output metadata and data matrix as separate files" type="boolean" checked="false" + truevalue="TRUE" falsevalue="FALSE" help="Keep two output files, one being the data matrix (feature table) and the second being the metadata table." /> + </xml> <xml name="outputs"> <outputs> - <data name="normalized_data" format="tsv"> + <data name="normalized_data" format="tabular" label="Normalized table of ${on_string}"> + <change_format> + <when input_dataset="data" attribute="ext" value="parquet" format="parquet" /> + </change_format> + </data> + <data name="metadata" format="tabular" label="Metadata table of ${on_string}"> + <filter>keep_two_output</filter> <change_format> <when input_dataset="data" attribute="ext" value="parquet" format="parquet" /> </change_format>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test10_output1.tsv Thu Jun 06 12:25:05 2024 +0000 @@ -0,0 +1,5 @@ +id VT_160120_002 VT_160120_004 VT_160120_006 VT_160120_008 VT_160120_010 +M85T34 355200.506508035 216897.826587868 362337.195084504 143303.377379009 189065.516447239 +M86T41 75115889.9077485 75204863.1495248 76490295.1450204 83771659.9549148 84108898.7658797 +M86T518 6101488.54615418 6170882.26270475 12588041.969092 6181538.46316058 6103964.42378424 +M86T539 2007379.02604984 2069979.64992079 1818589.63912375 1975712.25920485 1935671.32085241
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test10_output2.tsv Thu Jun 06 12:25:05 2024 +0000 @@ -0,0 +1,6 @@ +sampleName class sampleType injectionOrder batch +VT_160120_002 sample sample 1 1 +VT_160120_004 sample sample 2 1 +VT_160120_006 sample sample 3 1 +VT_160120_008 sample sample 4 1 +VT_160120_010 sample sample 5 1
--- a/waveica.xml Thu May 30 14:54:02 2024 +0000 +++ b/waveica.xml Thu Jun 06 12:25:05 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy6" profile="21.09"> +<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy7" profile="21.09"> <description>removal of batch effects for untargeted metabolomics data</description> <macros> <import>macros.xml</import> @@ -51,14 +51,14 @@ )' #end if - -e 'store_data(normalized_data, "$normalized_data", "$input_num.data.ext")' + -e 'store_data(normalized_data, "$normalized_data", "$metadata", "$input_num.data.ext", $keep_two_output)' ]]></command> <inputs> <conditional name="input_num"> <param name="input_choice" type="select" label="Choose input files:"> - <option value="1" selected="true">1</option> - <option value="2">2</option> + <option value="1" selected="true">1: intensity-by-feature table with metadata</option> + <option value="2">2: intensity-by-feature table and metadata table separately</option> </param> <when value="1"> <expand macro="input_data"/> @@ -89,12 +89,13 @@ </when> </conditional> <expand macro="exclude_blanks"/> + <expand macro="split_output"/> </inputs> <expand macro="outputs"/> <tests> - <test><!-- TEST 1 --> + <test expect_num_outputs="1"><!-- TEST 1 --> <param name="data" value="input_data.csv" ftype="csv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> @@ -103,9 +104,9 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> - <output name="normalized_data" file="normalized_data.tsv" ftype="tsv"/> + <output name="normalized_data" file="normalized_data.tsv" ftype="tabular"/> </test> - <test><!-- TEST 2 --> + <test expect_num_outputs="1"><!-- TEST 2 --> <param name="data" value="input_data.tsv" ftype="tsv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> @@ -114,9 +115,9 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> - <output name="normalized_data" file="normalized_data.tsv" ftype="tsv"/> + <output name="normalized_data" file="normalized_data.tsv" ftype="tabular"/> </test> - <test><!-- TEST 3 --> + <test expect_num_outputs="1"><!-- TEST 3 --> <param name="data" value="input_data.parquet" ftype="parquet"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> @@ -127,7 +128,7 @@ <param name="alpha" value="0"/> <output name="normalized_data" file="normalized_data.parquet" ftype="parquet"/> </test> - <test><!-- TEST 4 --> + <test expect_num_outputs="1"><!-- TEST 4 --> <param name="input_choice" value="2"/> <param name="data" value="feature_table.csv" ftype="csv"/> <param name="metadata" value="metadata.csv" ftype="csv"/> @@ -138,9 +139,9 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> - <output name="normalized_data" file="normalized_data.tsv" ftype="tsv"/> + <output name="normalized_data" file="normalized_data.tsv" ftype="tabular"/> </test> - <test><!-- TEST 5 --> + <test expect_num_outputs="1"><!-- TEST 5 --> <param name="input_choice" value="2"/> <param name="data" value="feature_table.tsv" ftype="tsv"/> <param name="metadata" value="metadata.tsv" ftype="tsv"/> @@ -151,9 +152,9 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> - <output name="normalized_data" file="normalized_data.tsv" ftype="tsv"/> + <output name="normalized_data" file="normalized_data.tsv" ftype="tabular"/> </test> - <test><!-- TEST 6 --> + <test expect_num_outputs="1"><!-- TEST 6 --> <param name="input_choice" value="2"/> <param name="data" value="feature_table.parquet" ftype="parquet"/> <param name="metadata" value="metadata.csv" ftype="csv"/> @@ -166,7 +167,7 @@ <param name="alpha" value="0"/> <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200" ftype="parquet"/> </test> - <test><!-- TEST 7 --> + <test expect_num_outputs="1"><!-- TEST 7 --> <param name="input_choice" value="2"/> <param name="data" value="feature_table_transpose_version.parquet" ftype="parquet"/> <param name="metadata" value="metadata.parquet" ftype="parquet"/> @@ -180,7 +181,7 @@ <param name="alpha" value="0"/> <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200" ftype="parquet"/> </test> - <test><!-- TEST 8 --> + <test expect_num_outputs="1"><!-- TEST 8 --> <param name="input_choice" value="2"/> <param name="data" value="feature_table_transpose_version.csv" ftype="csv"/> <param name="metadata" value="metadata.csv" ftype="csv"/> @@ -192,7 +193,7 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> - <output name="normalized_data" file="normalized_data.tsv" ftype="tsv"/> + <output name="normalized_data" file="normalized_data.tsv" ftype="tabular"/> </test> <!-- The following test has different results on three platform I've tried --> <!-- <test> @@ -205,10 +206,42 @@ <param name="cutoff" value="0"/> <output name="normalized_data" file="normalized_data_nobatch.tsv"/> </test> --> - <test expect_failure="true"> + <test expect_num_outputs="2"><!-- TEST 9 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table_transpose_version.parquet" ftype="parquet"/> + <param name="metadata" value="metadata.parquet" ftype="parquet"/> + <param name="transpose_feature_table" value="TRUE"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="wavelet_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <param name="keep_two_output" value="TRUE"/> + <output name="normalized_data" file="test9_output1.parquet" ftype="parquet"/> + <output name="metadata" file="test9_output2.parquet" ftype="parquet"/> + </test> + <test expect_num_outputs="2"><!-- TEST 10 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table_transpose_version.csv" ftype="csv"/> + <param name="metadata" value="metadata.csv" ftype="csv"/> + <param name="transpose_feature_table" value="TRUE"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="wavelet_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <param name="keep_two_output" value="TRUE"/> + <output name="normalized_data" file="test10_output1.tsv" ftype="tabular"/> + <output name="metadata" file="test10_output2.tsv" ftype="tabular"/> + </test> + <test expect_failure="true"><!-- TEST 11 --> <param name="data" value="na_data.csv" ftype="csv"/> </test> - <test expect_failure="true"> + <test expect_failure="true"><!-- TEST 12 --> <param name="data" value="incomplete_metadata_data.csv" ftype="csv"/> </test> </tests>
--- a/waveica_wrapper.R Thu May 30 14:54:02 2024 +0000 +++ b/waveica_wrapper.R Thu Jun 06 12:25:05 2024 +0000 @@ -3,10 +3,7 @@ if (transpose) { col_names <- c("sampleName", data[[1]]) - t_data <- data[-1] - t_data <- t(t_data) - data <- data.frame(rownames(t_data), t_data) - colnames(data) <- col_names + data <- tranpose_data(data, col_names) } if (!is.na(metadata)) { @@ -133,7 +130,6 @@ return(data) } - sort_by_injection_order <- function(data) { if ("batch" %in% colnames(data)) { data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] @@ -143,7 +139,6 @@ return(data) } - verify_input_dataframe <- function(data, required_columns) { if (anyNA(data)) { stop("Error: dataframe cannot contain NULL values! @@ -194,7 +189,6 @@ return(data) } - # Match group labels with [blank/sample/qc] and enumerate them enumerate_groups <- function(group) { group[grepl("blank", tolower(group))] <- 0 @@ -204,7 +198,6 @@ return(group) } - # Create appropriate input for R wavelets function get_wf <- function(wavelet_filter, wavelet_length) { wf <- paste(wavelet_filter, wavelet_length, sep = "") @@ -217,7 +210,6 @@ return(wf) } - # Exclude blanks from a dataframe exclude_group <- function(data, group) { row_idx_to_exclude <- which(group %in% 0) @@ -230,14 +222,62 @@ } } -store_data <- function(data, output, ext) { +store_data <- function(data, feature_output, metadata_output, ext, split_output = FALSE) { if (ext == "parquet") { - arrow::write_parquet(data, output) + if (split_output == TRUE) { + split_df <- split_output(data) + arrow::write_parquet(split_df$metadata, metadata_output) + arrow::write_parquet(split_df$feature_table, feature_output) + } else { + arrow::write_parquet(data, feature_output) + } } else { - write.table(data, - file = output, sep = "\t", - row.names = FALSE, quote = FALSE - ) + if (split_output == TRUE) { + split_df <- split_output(data) + write.table(split_df$metadata, + file = metadata_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + write.table(split_df$feature_table, + file = feature_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + } else { + write.table(data, + file = feature_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + } } cat("Normalization has been completed.\n") } + +split_output <- function(df) { + required_columns_set1 <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") + required_columns_set2 <- c("sampleName", "class", "sampleType", "injectionOrder") + + if (all(required_columns_set1 %in% colnames(df))) { + metadata_df <- df[, required_columns_set1, drop = FALSE] + df <- df[, -c(2:5)] + } else if (all(required_columns_set2 %in% colnames(df))) { + metadata_df <- df[, required_columns_set2, drop = FALSE] + df <- df[, -c(2:4)] + } else { + stop("Neither set of required columns is present in the dataframe.") + } + + # Transpose the feature table + col_names <- c("id", as.vector(df[[1]])) + feature_table <- tranpose_data(df, col_names) + + return(list(metadata = metadata_df, feature_table = feature_table)) +} + +tranpose_data <- function(data, column_names) { + t_data <- data[-1] + t_data <- t(t_data) + tranposed_data <- data.frame(rownames(t_data), t_data) + colnames(tranposed_data) <- column_names + + return(tranposed_data) +}