Mercurial > repos > recetox > waveica
changeset 2:6480c6d5fa36 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
author | recetox |
---|---|
date | Fri, 23 Jun 2023 07:27:29 +0000 |
parents | b77023c41c76 |
children | dbbedb14b44c |
files | macros.xml test-data/feature_table_transpose_version.csv test-data/feature_table_transpose_version.parquet waveica.xml waveica_wrapper.R |
diffstat | 5 files changed, 122 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Sep 29 15:21:04 2022 +0000 +++ b/macros.xml Fri Jun 23 07:27:29 2023 +0000 @@ -20,6 +20,12 @@ </creator> </xml> + <xml name="annotation"> + <xrefs> + <xref type="bio.tools">waveica</xref> + </xrefs> + </xml> + <xml name="input_data"> <param type="data" name="data" label="Feature table" format="csv,tsv,parquet" help=""/> </xml>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/feature_table_transpose_version.csv Fri Jun 23 07:27:29 2023 +0000 @@ -0,0 +1,5 @@ +id,VT_160120_002,VT_160120_004,VT_160120_006,VT_160120_008,VT_160120_010 +M85T34,228520.06430737,90217.384387202,235656.75288383896,16622.9351783435,62385.0742465736 +M86T41,35646729.21543971,35735702.457215995,37021134.452711605,44302499.262606,44639738.0735709 +M86T518,2386896.97966461,2456290.69621518,8873450.40260241,2466946.89667101,2389372.85729467 +M86T539,1026645.83653468,1089246.46040563,837856.449608585,994979.069689685,954938.131337246
--- a/waveica.xml Thu Sep 29 15:21:04 2022 +0000 +++ b/waveica.xml Fri Jun 23 07:27:29 2023 +0000 @@ -1,8 +1,9 @@ -<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy2" python_template_version="3.5"> +<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy3" python_template_version="3.5"> <description>removal of batch effects for untargeted metabolomics data</description> <macros> <import>macros.xml</import> </macros> + <expand macro="annotation"/> <expand macro="creator"/> <requirements> @@ -19,6 +20,7 @@ #if $input_num.input_choice == "2": metadata = "$input_num.input_metadata.metadata", ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + transpose = $input_num.transpose_feature_table, #else: ext = "$input_num.data.ext", #end if @@ -36,6 +38,7 @@ #if $input_num.input_choice == "2": metadata = "$input_num.input_metadata.metadata", ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + transpose = $input_num.transpose_feature_table, #else: ext = "$input_num.data.ext", #end if @@ -61,10 +64,12 @@ <expand macro="input_data"/> </when> <when value="2"> + <expand macro="input_data"/> <section name="input_metadata" title="Input metadata table" expanded="true"> <param name="metadata" label="Input metadata" type="data" format="csv,tsv,parquet" help="" /> - </section> - <expand macro="input_data"/> + </section> + <param name = "transpose_feature_table" label="Transpose feature table" type="boolean" checked="false" + truevalue="TRUE" falsevalue="FALSE" help="Swap sample names with feature names as column headers (to fit recetox-aplcms outputs)." /> </when> </conditional> <expand macro="general_parameters"/> @@ -93,7 +98,7 @@ <param name="data" value="input_data.csv" ftype="csv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> @@ -104,7 +109,7 @@ <param name="data" value="input_data.tsv" ftype="tsv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> @@ -115,7 +120,7 @@ <param name="data" value="input_data.parquet" ftype="parquet"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> @@ -128,7 +133,7 @@ <param name="metadata" value="metadata.csv" ftype="csv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> @@ -141,7 +146,7 @@ <param name="metadata" value="metadata.tsv" ftype="tsv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> @@ -154,13 +159,41 @@ <param name="metadata" value="metadata.csv" ftype="csv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> - <param name="filter_length" value="2"/> + <param name="wavelet_length" value="2"/> <param name="k" value="20"/> <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200"/> </test> + <test><!-- TEST 7 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table_transpose_version.parquet" ftype="parquet"/> + <param name="metadata" value="metadata.parquet" ftype="parquet"/> + <param name="transpose_feature_table" value="TRUE"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="wavelet_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200"/> + </test> + <test><!-- TEST 8 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table_transpose_version.csv" ftype="csv"/> + <param name="metadata" value="metadata.csv" ftype="csv"/> + <param name="transpose_feature_table" value="TRUE"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="wavelet_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.csv"/> + </test> <!-- The following test has different results on three platform I've tried --> <!-- <test> <param name="data" value="input_data_nobatch.csv" ftype="csv"/>
--- a/waveica_wrapper.R Thu Sep 29 15:21:04 2022 +0000 +++ b/waveica_wrapper.R Fri Jun 23 07:27:29 2023 +0000 @@ -1,6 +1,14 @@ -read_file <- function(file, metadata, ft_ext, mt_ext) { +read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { data <- read_data(file, ft_ext) + if (transpose) { + col_names <- c("sampleName", data[[1]]) + t_data <- data[-1] + t_data <- t(t_data) + data <- data.frame(rownames(t_data), t_data) + colnames(data) <- col_names + } + if (!is.na(metadata)) { mt_data <- read_data(metadata, mt_ext) data <- merge(mt_data, data, by = "sampleName") @@ -24,6 +32,7 @@ waveica <- function(file, metadata = NA, ext, + transpose = FALSE, wavelet_filter, wavelet_length, k, @@ -31,17 +40,19 @@ t2, alpha, exclude_blanks) { - # get input from the Galaxy, preprocess data ext <- strsplit(x = ext, split = "\\,")[[1]] ft_ext <- ext[1] mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) - required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") - verify_input_dataframe(data, required_columns) + required_columns <- c( + "sampleName", "class", "sampleType", + "injectionOrder", "batch" + ) + data <- verify_input_dataframe(data, required_columns) data <- sort_by_injection_order(data) @@ -76,28 +87,30 @@ waveica_singlebatch <- function(file, metadata = NA, ext, + transpose = FALSE, wavelet_filter, wavelet_length, k, alpha, cutoff, exclude_blanks) { - # get input from the Galaxy, preprocess data ext <- strsplit(x = ext, split = "\\,")[[1]] ft_ext <- ext[1] mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") optional_columns <- c("batch") - verify_input_dataframe(data, required_columns) + + data <- verify_input_dataframe(data, required_columns) data <- sort_by_injection_order(data) - feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] + feature_columns <- colnames(data)[!colnames(data) %in% + c(required_columns, optional_columns)] features <- data[, feature_columns] injection_order <- data$injectionOrder @@ -112,7 +125,7 @@ ) data[, feature_columns] <- features - + group <- enumerate_groups(as.character(data$sampleType)) # remove blanks from dataset if (exclude_blanks) { data <- exclude_group(data, group) @@ -142,9 +155,49 @@ stop("Error: dataframe cannot contain NULL values! Make sure that your dataframe does not contain empty cells") } else if (!all(required_columns %in% colnames(data))) { - stop("Error: missing metadata! -Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", ")) + stop( + "Error: missing metadata! +Make sure that the following columns are present in your dataframe: ", + paste(required_columns, collapse = ", ") + ) } + + data <- verify_column_types(data, required_columns) + + return(data) +} + +verify_column_types <- function(data, required_columns) { + # Specify the column names and their expected types + column_types <- list( + "sampleName" = c("character", "factor"), + "class" = c("character", "factor"), + "sampleType" = c("character", "factor"), + "injectionOrder" = "integer", + "batch" = "integer" + ) + + column_types <- column_types[required_columns] + + for (col_name in names(data)) { + actual_type <- class(data[[col_name]]) + if (col_name %in% names(column_types)) { + expected_types <- column_types[[col_name]] + + if (!actual_type %in% expected_types) { + stop( + "Column ", col_name, " is of type ", actual_type, + " but expected type is ", + paste(expected_types, collapse = " or "), "\n" + ) + } + } else { + if (actual_type != "numeric") { + data[[col_name]] <- as.numeric(as.character(data[[col_name]])) + } + } + } + return(data) } @@ -187,7 +240,10 @@ if (ext == "csv") { write.csv(data, file = output, row.names = FALSE, quote = FALSE) } else if (ext == "tsv") { - write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) + write.table(data, + file = output, sep = "\t", + row.names = FALSE, quote = FALSE + ) } else { arrow::write_parquet(data, sink = output) }