Mercurial > repos > recetox > waveica
changeset 1:b77023c41c76 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
author | recetox |
---|---|
date | Thu, 29 Sep 2022 15:21:04 +0000 |
parents | 328710890963 |
children | 6480c6d5fa36 |
files | macros.xml test-data/feature_table.csv test-data/feature_table.parquet test-data/feature_table.tsv test-data/input_data.parquet test-data/input_data.tsv test-data/metadata.csv test-data/metadata.parquet test-data/metadata.tsv test-data/normalized_data.csv test-data/normalized_data.parquet waveica.xml waveica_wrapper.R |
diffstat | 13 files changed, 189 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Wed Mar 23 11:35:30 2022 +0000 +++ b/macros.xml Thu Sep 29 15:21:04 2022 +0000 @@ -8,6 +8,11 @@ familyName="Skoryk" url="https://github.com/maximskorik" identifier="0000-0003-2056-8018" /> + <person + givenName="Zargham" + familyName="Ahmad" + url="https://github.com/zargham-ahmad" + identifier="0000-0002-6096-224X" /> <organization url="https://www.recetox.muni.cz/" email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" @@ -15,8 +20,10 @@ </creator> </xml> + <xml name="input_data"> + <param type="data" name="data" label="Feature table" format="csv,tsv,parquet" help=""/> + </xml> <xml name="general_parameters"> - <param type="data" name="data" label="Feature table" format="csv" help=""/> <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/> <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/> </xml> @@ -86,7 +93,7 @@ <xml name="outputs"> <outputs> - <data name="normalized_data" format="tsv" /> + <data format_source="data" name="normalized_data"/> </outputs> </xml>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/feature_table.csv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName,M85T34,M86T41,M86T518,M86T539 +VT_160120_002,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468 +VT_160120_004,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563 +VT_160120_006,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585 +VT_160120_008,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685 +VT_160120_010,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/feature_table.tsv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName M85T34 M86T41 M86T518 M86T539 +VT_160120_002 228520.06430737 35646729.21543971 2386896.97966461 1026645.83653468 +VT_160120_004 90217.384387202 35735702.457215995 2456290.69621518 1089246.46040563 +VT_160120_006 235656.75288383896 37021134.452711605 8873450.40260241 837856.449608585 +VT_160120_008 16622.9351783435 44302499.262606 2466946.89667101 994979.069689685 +VT_160120_010 62385.0742465736 44639738.0735709 2389372.85729467 954938.131337246
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_data.tsv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName class sampleType injectionOrder batch M85T34 M86T41 M86T518 M86T539 +VT_160120_002 sample sample 1 1 228520.06430737 35646729.21543971 2386896.97966461 1026645.83653468 +VT_160120_004 sample sample 2 1 90217.384387202 35735702.457215995 2456290.69621518 1089246.46040563 +VT_160120_006 sample sample 3 1 235656.75288383896 37021134.452711605 8873450.40260241 837856.449608585 +VT_160120_008 sample sample 4 1 16622.9351783435 44302499.262606 2466946.89667101 994979.069689685 +VT_160120_010 sample sample 5 1 62385.0742465736 44639738.0735709 2389372.85729467 954938.131337246
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metadata.csv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName,class,sampleType,injectionOrder,batch +VT_160120_002,sample,sample,1,1 +VT_160120_004,sample,sample,2,1 +VT_160120_006,sample,sample,3,1 +VT_160120_008,sample,sample,4,1 +VT_160120_010,sample,sample,5,1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metadata.tsv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName class sampleType injectionOrder batch +VT_160120_002 sample sample 1 1 +VT_160120_004 sample sample 2 1 +VT_160120_006 sample sample 3 1 +VT_160120_008 sample sample 4 1 +VT_160120_010 sample sample 5 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normalized_data.csv Thu Sep 29 15:21:04 2022 +0000 @@ -0,0 +1,6 @@ +sampleName,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539 +VT_160120_002,sample,sample,1,1,355200.506508035,75115889.9077485,6101488.54615418,2007379.02604984 +VT_160120_004,sample,sample,2,1,216897.826587868,75204863.1495248,6170882.26270475,2069979.64992079 +VT_160120_006,sample,sample,3,1,362337.195084504,76490295.1450204,12588041.969092,1818589.63912375 +VT_160120_008,sample,sample,4,1,143303.377379009,83771659.9549148,6181538.46316058,1975712.25920485 +VT_160120_010,sample,sample,5,1,189065.516447239,84108898.7658797,6103964.42378424,1935671.32085241
--- a/waveica.xml Wed Mar 23 11:35:30 2022 +0000 +++ b/waveica.xml Thu Sep 29 15:21:04 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy1" python_template_version="3.5"> +<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy2" python_template_version="3.5"> <description>removal of batch effects for untargeted metabolomics data</description> <macros> <import>macros.xml</import> @@ -7,6 +7,7 @@ <requirements> <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement> + <requirement type="package" version="8.0.0">r-arrow</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ Rscript @@ -14,7 +15,13 @@ #if $batch_correction.mode == "batchwise": -e 'normalized_data <- waveica( - data = "$data", + file = "$input_num.data", + #if $input_num.input_choice == "2": + metadata = "$input_num.input_metadata.metadata", + ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + #else: + ext = "$input_num.data.ext", + #end if wavelet_filter = "$wf.wavelet_filter", wavelet_length = "$wf.wavelet_length", k = $k, @@ -24,8 +31,14 @@ exclude_blanks = $exclude_blanks )' #else if $batch_correction.mode == "single_batch": - -e 'normalized_data <- waveica_singlebatch( - data = "$data", + -e 'normalized_data <- waveica_singlebatch( + file = "$input_num.data", + #if $input_num.input_choice == "2": + metadata = "$input_num.input_metadata.metadata", + ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext", + #else: + ext = "$input_num.data.ext", + #end if wavelet_filter = "$wf.wavelet_filter", wavelet_length = "$wf.wavelet_length", k = $k, @@ -35,10 +48,25 @@ )' #end if - -e 'store_data(normalized_data,"$normalized_data")' + -e 'store_data(normalized_data, "$normalized_data", "$input_num.data.ext")' ]]></command> <inputs> + <conditional name="input_num"> + <param name="input_choice" type="select" label="Choose input files:"> + <option value="1" selected="true">1</option> + <option value="2">2</option> + </param> + <when value="1"> + <expand macro="input_data"/> + </when> + <when value="2"> + <section name="input_metadata" title="Input metadata table" expanded="true"> + <param name="metadata" label="Input metadata" type="data" format="csv,tsv,parquet" help="" /> + </section> + <expand macro="input_data"/> + </when> + </conditional> <expand macro="general_parameters"/> <expand macro="wf"/> <conditional name="batch_correction"> @@ -61,7 +89,7 @@ <expand macro="outputs"/> <tests> - <test> + <test><!-- TEST 1 --> <param name="data" value="input_data.csv" ftype="csv"/> <param name="mode" value="batchwise"/> <param name="wavelet_filter" value="d"/> @@ -70,8 +98,69 @@ <param name="t" value="0.05"/> <param name="t2" value="0.05"/> <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.csv"/> + </test> + <test><!-- TEST 2 --> + <param name="data" value="input_data.tsv" ftype="tsv"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> <output name="normalized_data" file="normalized_data.tsv"/> </test> + <test><!-- TEST 3 --> + <param name="data" value="input_data.parquet" ftype="parquet"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.parquet"/> + </test> + <test><!-- TEST 4 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table.csv" ftype="csv"/> + <param name="metadata" value="metadata.csv" ftype="csv"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.csv"/> + </test> + <test><!-- TEST 5 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table.tsv" ftype="tsv"/> + <param name="metadata" value="metadata.tsv" ftype="tsv"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.tsv"/> + </test> + <test><!-- TEST 6 --> + <param name="input_choice" value="2"/> + <param name="data" value="feature_table.parquet" ftype="parquet"/> + <param name="metadata" value="metadata.csv" ftype="csv"/> + <param name="mode" value="batchwise"/> + <param name="wavelet_filter" value="d"/> + <param name="filter_length" value="2"/> + <param name="k" value="20"/> + <param name="t" value="0.05"/> + <param name="t2" value="0.05"/> + <param name="alpha" value="0"/> + <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200"/> + </test> <!-- The following test has different results on three platform I've tried --> <!-- <test> <param name="data" value="input_data_nobatch.csv" ftype="csv"/>
--- a/waveica_wrapper.R Wed Mar 23 11:35:30 2022 +0000 +++ b/waveica_wrapper.R Thu Sep 29 15:21:04 2022 +0000 @@ -1,4 +1,29 @@ -waveica <- function(data, +read_file <- function(file, metadata, ft_ext, mt_ext) { + data <- read_data(file, ft_ext) + + if (!is.na(metadata)) { + mt_data <- read_data(metadata, mt_ext) + data <- merge(mt_data, data, by = "sampleName") + } + + return(data) +} + +read_data <- function(file, ext) { + if (ext == "csv") { + data <- read.csv(file, header = TRUE) + } else if (ext == "tsv") { + data <- read.csv(file, header = TRUE, sep = "\t") + } else { + data <- arrow::read_parquet(file) + } + + return(data) +} + +waveica <- function(file, + metadata = NA, + ext, wavelet_filter, wavelet_length, k, @@ -8,7 +33,12 @@ exclude_blanks) { # get input from the Galaxy, preprocess data - data <- read.csv(data, header = TRUE) + ext <- strsplit(x = ext, split = "\\,")[[1]] + + ft_ext <- ext[1] + mt_ext <- ext[2] + + data <- read_file(file, metadata, ft_ext, mt_ext) required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") verify_input_dataframe(data, required_columns) @@ -43,8 +73,9 @@ return(data) } - -waveica_singlebatch <- function(data, +waveica_singlebatch <- function(file, + metadata = NA, + ext, wavelet_filter, wavelet_length, k, @@ -53,7 +84,12 @@ exclude_blanks) { # get input from the Galaxy, preprocess data - data <- read.csv(data, header = TRUE) + ext <- strsplit(x = ext, split = "\\,")[[1]] + + ft_ext <- ext[1] + mt_ext <- ext[2] + + data <- read_file(file, metadata, ft_ext, mt_ext) required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") optional_columns <- c("batch") @@ -147,9 +183,13 @@ } } - -# Store output of WaveICA in a tsv file -store_data <- function(data, output) { - write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) +store_data <- function(data, output, ext) { + if (ext == "csv") { + write.csv(data, file = output, row.names = FALSE, quote = FALSE) + } else if (ext == "tsv") { + write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) + } else { + arrow::write_parquet(data, sink = output) + } cat("Normalization has been completed.\n") }