Mercurial > repos > iuc > fastspar_pvalues

diff fastspar_pvalues.xml @ 0:7ff1304e12aa draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastspar commit 0e305d21d0634a1788b9105ec4d0ab1c2da62359
author: iuc
date: Thu, 19 Jun 2025 21:51:18 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastspar_pvalues.xml	Thu Jun 19 21:51:18 2025 +0000
@@ -0,0 +1,158 @@
+<tool id="fastspar_pvalues" name="FastSpar: estimate p-values" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        Bootstrap-based estimation of p-values from FastSpar correlations
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements_pvalues"/>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $correlation.select == "new"
+            fastspar
+                --otu_table '$otu_table'
+                --iterations $iterations
+                --exclude_iterations $exclude_iterations
+                --threshold $threshold
+                --seed $seed
+                --correlation '$output_correlation'
+                --covariance '$output_covariance'
+                --threads \${GALAXY_SLOTS:-1}
+                ## Skip warning prompt and continue analysis even if the input contains OTUs with just one permutation.
+                --yes &&
+            #set $correlation_file = $output_correlation
+        #else
+            #set $correlation_file = $correlation.input_file
+        #end if
+        
+        mkdir bootstrap_counts
+     && fastspar_bootstrap
+            --otu_table '$otu_table'
+            --number $number
+            --prefix bootstrap_counts/data
+            --seed $seed
+            --threads \${GALAXY_SLOTS:-1}
+        
+     && mkdir bootstrap_correlation
+     && parallel
+            --max-procs \${GALAXY_SLOTS:-1}
+            fastspar
+                --otu_table {}
+                --correlation bootstrap_correlation/cor_{/}
+                --covariance bootstrap_correlation/cov_{/}
+                --iterations $iterations
+                --exclude_iterations $exclude_iterations
+                --threshold $threshold
+                --seed $seed
+            ::: bootstrap_counts/*
+        
+     && fastspar_pvalues
+            --otu_table '$otu_table'
+            --correlation '$correlation_file'
+            --prefix bootstrap_correlation/cor_data_
+            --permutations $number
+            $pseudo
+            --threads \${GALAXY_SLOTS:-1}
+            --outfile '$pvalues'
+    ]]></command>
+    <inputs>
+        <param argument="--otu_table" type="data" format="tabular" label="Input OTU table"
+               help="The table must contain absolute OTU counts in plain tabular (TSV) format, with OTUs as rows and samples as columns. Do not include any metadata rows or columns."/>
+        <conditional name="correlation">
+            <param name="select" type="select" label="Tested correlation matrix"
+                   help="For meaningful p-values, the parameters used during bootstrapped correlation estimation should be identical to those used for the FastSpar run which produced the correlation matrix. &lt;br&gt;For your convenience you can choose to calculate the correlation matrix here. In that case the seed used for the calculation is the same one used for generating the bootstrapped samples.">
+                    <option value="new">Recalculate the correlation matrix</option>
+                    <option value="original">Use an existing correlation matrix</option>
+            </param>
+            <when value="new"/>
+            <when value="original">
+                <param name="input_file" type="data" format="tabular" argument="--correlation" label="Correlation table" help="The correlation matrix generated by the original FastSpar analysis."/>
+            </when>
+        </conditional>
+        <param argument="--number" type="integer" min="10" max="10000" value="1000" label="Number of bootstrap samples" help="Recommended minimum: 1000 bootstrap samples for robust estimation."/>
+        <expand macro="fastspar_tool_parameters"/>
+        <param argument="--seed" type="integer" value="1" label="Seed to ensure reproducibility of bootstrapped samples."/>
+        <param argument="--pseudo" type="boolean" truevalue="--pseudo" falsevalue="" label="Use pseudo p-values" help="If selected, pseudo p-values are calculated instead of exact p-values. This can provide faster estimates but may be less precise."/>
+    </inputs>
+    <outputs>
+        <data name="output_correlation" format="tabular" label="${tool.name} on ${on_string}: median_correlation.tsv">
+            <filter>correlation['select'] == "new"</filter>
+        </data>
+        <data name="output_covariance" format="tabular" label="${tool.name} on ${on_string}: median_covariance.tsv">
+            <filter>correlation['select'] == "new"</filter>
+        </data>
+        <data name="pvalues" format="tabular" label="${tool.name} on ${on_string}: pvalues.tsv"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
+            <conditional name="correlation">
+                <param name="select" value="original"/>
+                <param name="input_file" ftype="tabular" value="fake_data_cor.tsv"/>
+            </conditional>
+            <param name="number" value="10"/>
+            <output name="pvalues" file="fake_pvalues.tsv" compare="diff"/>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
+            <conditional name="correlation">
+                <param name="select" value="new"/>
+            </conditional>
+            <param name="number" value="10"/>
+            <output name="output_correlation" file="fake_data_cor.tsv" compare="diff"/>
+            <output name="output_covariance" file="fake_data_cov.tsv" compare="diff"/>
+            <output name="pvalues" file="fake_pvalues.tsv" compare="diff"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+============
+
+This tool estimates **empirical p-values** for correlation values generated by FastSpar. It uses a **bootstrap-based permutation approach** to assess the statistical significance of observed correlations.
+
+You can choose to recalculate the correlation matrix with the same parameters or use an existing correlation matrix.
+
+How it works
+============
+
+1. Generates multiple bootstrapped versions of the OTU table.
+2. Runs FastSpar on each bootstrap replicate.
+3. Compares bootstrapped correlations to the original correlation matrix to calculate empirical p-values.
+
+Required Inputs
+===============
+
+- **OTU table**: TSV file with absolute counts (no metadata).
+- **Correlation table** (optional): Output from the original FastSpar run.
+- **Bootstrap samples**: Number of bootstrap replicates (≥1000 recommended).
+
+Important Parameters
+====================
+
+- **Iterations**: Must match the number used in the original FastSpar run.
+- **Exclude Iterations** and **Threshold**: Should also match the original settings, if used.
+- **Seed**: Optional, for reproducibility.
+- **Pseudo**: Choose whether to calculate pseudo p-values instead of exact values.
+
+IMPORTANT
+=========
+
+For meaningful p-values, the parameters used during bootstrapped correlation estimation (**iterations, exclude iterations, threshold**) should be identical to those used in the original FastSpar run.
+
+Output
+======
+
+- `pvalues.tsv`: A table of empirical p-values for all pairwise correlations.
+
+When "Recalculate the correlation matrix" is selected the tool will also output:
+
+- `median_correlation.tsv`: Correlation matrix between all OTUs.
+- `median_covariance.tsv`: Covariance matrix between all OTUs.
+
+Additional Resources
+====================
+
+- FastSpar GitHub: https://github.com/scwatts/fastspar
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file