view fastspar.xml @ 0:e9231fb122e9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastspar commit 0e305d21d0634a1788b9105ec4d0ab1c2da62359
author iuc
date Thu, 19 Jun 2025 21:51:32 +0000
parents
children
line wrap: on
line source

<tool id="fastspar" name="FastSpar" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        correlation estimation for compositional data
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        fastspar
            --otu_table '$otu_table'
            --iterations $iterations
            --exclude_iterations $exclude_iterations
            --threshold $threshold
            --seed $seed
            --correlation '$correlation'
            --covariance '$covariance'
            --threads \${GALAXY_SLOTS:-1}
            ## Skip warning prompt and continue analysis even if the input contains OTUs with just one permutation.
            --yes
    ]]></command>
    <inputs>
        <param argument="--otu_table" type="data" format="tabular" label="Input OTU table"
               help="The table must contain absolute OTU counts in plain tabular (TSV) format, with OTUs as rows and samples as columns. Do not include any metadata rows or columns."/>
        <expand macro="fastspar_tool_parameters"/>
        <param argument="--seed" type="integer" value="1" label="Random number seed"/>
    </inputs>
    <outputs>
        <data name="correlation" format="tabular" label="${tool.name} on ${on_string}: median_correlation.tsv"/>
        <data name="covariance" format="tabular" label="${tool.name} on ${on_string}: median_covariance.tsv"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
            <output name="correlation" file="fake_data_cor.tsv" compare="diff"/>
            <output name="covariance" file="fake_data_cov.tsv" compare="diff"/>
        </test>
        <test expect_num_outputs="2">
            <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
            <param name="exclude_iterations" value="20"/>
            <param name="threshold" value="0.2"/>
            <output name="correlation" ftype="tabular">
                <assert_contents>
                    <has_n_columns n="51"/>
                    <has_text text="1.0000"/>
                </assert_contents>
            </output>
            <output name="covariance" ftype="tabular">
                <assert_contents>
                    <has_n_columns n="51"/>
                    <has_text text="OTU ID"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
What it does
============

FastSpar is a C++ implementation of the SparCC algorithm for estimating correlations from compositional data.
This tool performs the **initial correlation and covariance matrix estimation** as the first step in the FastSpar pipeline.
**If you also want to estimate p-values** you might want to use `fastspar_pvalues` with "Recalculate the correlation matrix".

Required Inputs
===============

- **OTU table** (TSV format): Contains absolute OTU counts (not relative abundances). Must be a plain tabular file with samples in columns and OTUs in rows. Metadata is not supported.

Main Parameters
===============

- **Iterations** (`--iterations`): Number of correlation estimation rounds. More iterations improve stability.
- **Exclude iterations** (`--exclude_iterations`): Number of times highly correlated OTU pairs are removed.
- **Correlation threshold** (`--threshold`): Correlation strength above which to exclude OTU pairs.
- **Seed** (`--seed`): Random seed for reproducibility.

Main Features
===============

- Efficient and fast computation of sparse correlations.
- Customizable exclusion and thresholding strategy.
- Designed to handle compositional count data from microbiome studies.

Generated Outputs
=================

- `median_correlation.tsv`: Correlation matrix between all OTUs.
- `median_covariance.tsv`: Covariance matrix between all OTUs.

Additional Resources
====================

- FastSpar GitHub: [https://github.com/scwatts/fastspar]

For a complete FastSpar analysis, follow up with:

1. `fastspar_pvalues`: Estimate empirical p-values from bootstrap correlations.
2. `fastspar_reduce`: Filter correlation and p-value matrices to produce sparse networks.
    ]]></help>
    <expand macro="citations"/>
</tool>