changeset 1:b77023c41c76 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
author recetox
date Thu, 29 Sep 2022 15:21:04 +0000
parents 328710890963
children 6480c6d5fa36
files macros.xml test-data/feature_table.csv test-data/feature_table.parquet test-data/feature_table.tsv test-data/input_data.parquet test-data/input_data.tsv test-data/metadata.csv test-data/metadata.parquet test-data/metadata.tsv test-data/normalized_data.csv test-data/normalized_data.parquet waveica.xml waveica_wrapper.R
diffstat 13 files changed, 189 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Wed Mar 23 11:35:30 2022 +0000
+++ b/macros.xml	Thu Sep 29 15:21:04 2022 +0000
@@ -8,6 +8,11 @@
                 familyName="Skoryk"
                 url="https://github.com/maximskorik"
                 identifier="0000-0003-2056-8018" />
+            <person
+                givenName="Zargham"
+                familyName="Ahmad"
+                url="https://github.com/zargham-ahmad"
+                identifier="0000-0002-6096-224X" />
             <organization
                 url="https://www.recetox.muni.cz/"
                 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
@@ -15,8 +20,10 @@
         </creator>
     </xml>
 
+    <xml name="input_data">
+        <param type="data" name="data" label="Feature table" format="csv,tsv,parquet" help=""/>
+    </xml>
     <xml name="general_parameters">
-        <param type="data" name="data" label="Feature table" format="csv" help=""/>
         <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/>
         <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/>
     </xml>
@@ -86,7 +93,7 @@
 
     <xml name="outputs">
         <outputs>
-            <data name="normalized_data" format="tsv" />
+             <data format_source="data" name="normalized_data"/>
         </outputs>
     </xml>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_table.csv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,228520.06430737,35646729.21543971,2386896.97966461,1026645.83653468
+VT_160120_004,90217.384387202,35735702.457215995,2456290.69621518,1089246.46040563
+VT_160120_006,235656.75288383896,37021134.452711605,8873450.40260241,837856.449608585
+VT_160120_008,16622.9351783435,44302499.262606,2466946.89667101,994979.069689685
+VT_160120_010,62385.0742465736,44639738.0735709,2389372.85729467,954938.131337246
Binary file test-data/feature_table.parquet has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_table.tsv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName	M85T34	M86T41	M86T518	M86T539
+VT_160120_002	228520.06430737	35646729.21543971	2386896.97966461	1026645.83653468
+VT_160120_004	90217.384387202	35735702.457215995	2456290.69621518	1089246.46040563
+VT_160120_006	235656.75288383896	37021134.452711605	8873450.40260241	837856.449608585
+VT_160120_008	16622.9351783435	44302499.262606	2466946.89667101	994979.069689685
+VT_160120_010	62385.0742465736	44639738.0735709	2389372.85729467	954938.131337246
Binary file test-data/input_data.parquet has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_data.tsv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName	class	sampleType	injectionOrder	batch	M85T34	M86T41	M86T518	M86T539
+VT_160120_002	sample	sample	1	1	228520.06430737	35646729.21543971	2386896.97966461	1026645.83653468
+VT_160120_004	sample	sample	2	1	90217.384387202	35735702.457215995	2456290.69621518	1089246.46040563
+VT_160120_006	sample	sample	3	1	235656.75288383896	37021134.452711605	8873450.40260241	837856.449608585
+VT_160120_008	sample	sample	4	1	16622.9351783435	44302499.262606	2466946.89667101	994979.069689685
+VT_160120_010	sample	sample	5	1	62385.0742465736	44639738.0735709	2389372.85729467	954938.131337246
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata.csv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,injectionOrder,batch
+VT_160120_002,sample,sample,1,1
+VT_160120_004,sample,sample,2,1
+VT_160120_006,sample,sample,3,1
+VT_160120_008,sample,sample,4,1
+VT_160120_010,sample,sample,5,1
Binary file test-data/metadata.parquet has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata.tsv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName	class	sampleType	injectionOrder	batch
+VT_160120_002	sample	sample	1	1
+VT_160120_004	sample	sample	2	1
+VT_160120_006	sample	sample	3	1
+VT_160120_008	sample	sample	4	1
+VT_160120_010	sample	sample	5	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalized_data.csv	Thu Sep 29 15:21:04 2022 +0000
@@ -0,0 +1,6 @@
+sampleName,class,sampleType,injectionOrder,batch,M85T34,M86T41,M86T518,M86T539
+VT_160120_002,sample,sample,1,1,355200.506508035,75115889.9077485,6101488.54615418,2007379.02604984
+VT_160120_004,sample,sample,2,1,216897.826587868,75204863.1495248,6170882.26270475,2069979.64992079
+VT_160120_006,sample,sample,3,1,362337.195084504,76490295.1450204,12588041.969092,1818589.63912375
+VT_160120_008,sample,sample,4,1,143303.377379009,83771659.9549148,6181538.46316058,1975712.25920485
+VT_160120_010,sample,sample,5,1,189065.516447239,84108898.7658797,6103964.42378424,1935671.32085241
Binary file test-data/normalized_data.parquet has changed
--- a/waveica.xml	Wed Mar 23 11:35:30 2022 +0000
+++ b/waveica.xml	Thu Sep 29 15:21:04 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy1" python_template_version="3.5">
+<tool id="waveica" name="WaveICA" version="@TOOL_VERSION@+galaxy2" python_template_version="3.5">
     <description>removal of batch effects for untargeted metabolomics data</description>
     <macros>
         <import>macros.xml</import>
@@ -7,6 +7,7 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">r-recetox-waveica</requirement>
+        <requirement type="package" version="8.0.0">r-arrow</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
         Rscript
@@ -14,7 +15,13 @@
 
             #if $batch_correction.mode == "batchwise":
             -e 'normalized_data <- waveica(
-                data = "$data",
+                file = "$input_num.data",
+                #if $input_num.input_choice == "2":
+                    metadata = "$input_num.input_metadata.metadata",
+                    ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext",
+                #else:
+                    ext = "$input_num.data.ext",
+                #end if
                 wavelet_filter = "$wf.wavelet_filter",
                 wavelet_length = "$wf.wavelet_length",
                 k = $k,
@@ -24,8 +31,14 @@
                 exclude_blanks = $exclude_blanks
             )'
             #else if $batch_correction.mode == "single_batch":
-            -e 'normalized_data <- waveica_singlebatch(
-                data = "$data",
+            -e 'normalized_data <- waveica_singlebatch( 
+                file = "$input_num.data",
+                #if $input_num.input_choice == "2":
+                    metadata = "$input_num.input_metadata.metadata",
+                    ext = "$input_num.data.ext,$input_num.input_metadata.metadata.ext",
+                #else:
+                    ext = "$input_num.data.ext",
+                #end if
                 wavelet_filter = "$wf.wavelet_filter",
                 wavelet_length = "$wf.wavelet_length",
                 k = $k,
@@ -35,10 +48,25 @@
             )'
             #end if
 
-            -e 'store_data(normalized_data,"$normalized_data")'
+            -e 'store_data(normalized_data, "$normalized_data", "$input_num.data.ext")'
     ]]></command>
 
     <inputs>
+        <conditional name="input_num">
+            <param name="input_choice" type="select" label="Choose input files:">
+                    <option value="1" selected="true">1</option>
+                    <option value="2">2</option>
+            </param>
+            <when value="1">
+                <expand macro="input_data"/>
+            </when>
+            <when value="2">
+                <section name="input_metadata" title="Input metadata table" expanded="true">
+                    <param name="metadata" label="Input metadata" type="data" format="csv,tsv,parquet" help="" />
+                </section> 
+                <expand macro="input_data"/>
+            </when>
+        </conditional>
         <expand macro="general_parameters"/>
         <expand macro="wf"/>
         <conditional name="batch_correction">
@@ -61,7 +89,7 @@
     <expand macro="outputs"/>
 
     <tests>
-        <test>
+        <test><!-- TEST 1 -->
             <param name="data" value="input_data.csv" ftype="csv"/>
             <param name="mode" value="batchwise"/>
             <param name="wavelet_filter" value="d"/>
@@ -70,8 +98,69 @@
             <param name="t" value="0.05"/>
             <param name="t2" value="0.05"/>
             <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.csv"/>
+        </test>
+        <test><!-- TEST 2 -->
+            <param name="data" value="input_data.tsv" ftype="tsv"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
             <output name="normalized_data" file="normalized_data.tsv"/>
         </test>
+        <test><!-- TEST 3 -->
+            <param name="data" value="input_data.parquet" ftype="parquet"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.parquet"/>
+        </test>
+        <test><!-- TEST 4 -->
+            <param name="input_choice" value="2"/>
+            <param name="data" value="feature_table.csv" ftype="csv"/>
+            <param name="metadata" value="metadata.csv" ftype="csv"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.csv"/>
+        </test>
+        <test><!-- TEST 5 -->
+            <param name="input_choice" value="2"/>
+            <param name="data" value="feature_table.tsv" ftype="tsv"/>
+            <param name="metadata" value="metadata.tsv" ftype="tsv"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.tsv"/>
+        </test>
+        <test><!-- TEST 6 -->
+            <param name="input_choice" value="2"/>
+            <param name="data" value="feature_table.parquet" ftype="parquet"/>
+            <param name="metadata" value="metadata.csv" ftype="csv"/>
+            <param name="mode" value="batchwise"/>
+            <param name="wavelet_filter" value="d"/>
+            <param name="filter_length" value="2"/>
+            <param name="k" value="20"/>
+            <param name="t" value="0.05"/>
+            <param name="t2" value="0.05"/>
+            <param name="alpha" value="0"/>
+            <output name="normalized_data" file="normalized_data.parquet" compare="sim_size" delta="200"/>
+        </test>
         <!-- The following test has different results on three platform I've tried -->
         <!-- <test>
             <param name="data" value="input_data_nobatch.csv" ftype="csv"/>
--- a/waveica_wrapper.R	Wed Mar 23 11:35:30 2022 +0000
+++ b/waveica_wrapper.R	Thu Sep 29 15:21:04 2022 +0000
@@ -1,4 +1,29 @@
-waveica <- function(data,
+read_file <- function(file, metadata, ft_ext, mt_ext) {
+  data <- read_data(file, ft_ext)
+
+  if (!is.na(metadata)) {
+    mt_data <- read_data(metadata, mt_ext)
+    data <- merge(mt_data, data, by = "sampleName")
+  }
+
+  return(data)
+}
+
+read_data <- function(file, ext) {
+  if (ext == "csv") {
+    data <- read.csv(file, header = TRUE)
+  } else if (ext == "tsv") {
+    data <- read.csv(file, header = TRUE, sep = "\t")
+  } else {
+    data <- arrow::read_parquet(file)
+  }
+
+  return(data)
+}
+
+waveica <- function(file,
+                    metadata = NA,
+                    ext,
                     wavelet_filter,
                     wavelet_length,
                     k,
@@ -8,7 +33,12 @@
                     exclude_blanks) {
 
   # get input from the Galaxy, preprocess data
-  data <- read.csv(data, header = TRUE)
+  ext <- strsplit(x = ext, split = "\\,")[[1]]
+
+  ft_ext <- ext[1]
+  mt_ext <- ext[2]
+
+  data <- read_file(file, metadata, ft_ext, mt_ext)
 
   required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
   verify_input_dataframe(data, required_columns)
@@ -43,8 +73,9 @@
   return(data)
 }
 
-
-waveica_singlebatch <- function(data,
+waveica_singlebatch <- function(file,
+                                metadata = NA,
+                                ext,
                                 wavelet_filter,
                                 wavelet_length,
                                 k,
@@ -53,7 +84,12 @@
                                 exclude_blanks) {
 
   # get input from the Galaxy, preprocess data
-  data <- read.csv(data, header = TRUE)
+  ext <- strsplit(x = ext, split = "\\,")[[1]]
+
+  ft_ext <- ext[1]
+  mt_ext <- ext[2]
+
+  data <- read_file(file, metadata, ft_ext, mt_ext)
 
   required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
   optional_columns <- c("batch")
@@ -147,9 +183,13 @@
   }
 }
 
-
-# Store output of WaveICA in a tsv file
-store_data <- function(data, output) {
-  write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
+store_data <- function(data, output, ext) {
+  if (ext == "csv") {
+    write.csv(data, file = output, row.names = FALSE, quote = FALSE)
+  } else if (ext == "tsv") {
+    write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
+  } else {
+    arrow::write_parquet(data, sink = output)
+  }
   cat("Normalization has been completed.\n")
 }