view macros.xml @ 8:bf32ae95a06f draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 44e9371974b176490222f96d532df2421571cbaa
author recetox
date Tue, 06 Aug 2024 14:27:48 +0000
parents 1a2aeb8137bf
children
line wrap: on
line source

<macros>
    <token name="@TOOL_VERSION@">0.2.0</token>

    <xml name="creator">
        <creator>
            <person
                givenName="Maksym"
                familyName="Skoryk"
                url="https://github.com/maximskorik"
                identifier="0000-0003-2056-8018" />
            <person
                givenName="Zargham"
                familyName="Ahmad"
                url="https://github.com/zargham-ahmad"
                identifier="0000-0002-6096-224X" />
            <organization
                url="https://www.recetox.muni.cz/"
                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
                name="RECETOX MUNI" />
        </creator>
    </xml>

    <xml name="annotation">
        <xrefs>
               <xref type="bio.tools">waveica</xref>
        </xrefs>
     </xml>

    <xml name="input_data">
        <param type="data" name="data" label="Feature table" format="csv,tsv,parquet" help=""/>
    </xml>
    <xml name="general_parameters">
        <param type="integer" value="20" name="k" label="Number of components to decompose" help="maximal component that ICA decomposes"/>
        <param type="float" value="0" name="alpha" label="Alpha" help="trade-off value between the independence of samples (temporal ICA) and variables (spatial ICA), should be between 0 and 1"/>
    </xml>
    <xml name="batchwise_parameters">
        <param type="float" value="0.05" name="t" label="Batch-association threshold" help="threshold to consider a component associate with the batch,
        should be between 0 and 1"/>
        <param type="float" value="0.05" name="t2" label="Group-association threshold" help="threshold to consider a component associate with the group,
        should be between 0 and 1"/>
    </xml>
    <xml name="singlebatch_parameters">
        <param type="float" value="0" name="cutoff" label="Cutoff" help="threshold of the variation explained by the injection order for independent components, should be between 0 and 1"/>
    </xml>
    <xml name="exclude_blanks">
        <param name="exclude_blanks" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Remove blanks" help="Excludes blank samples from the output" />
    </xml>
    <xml name="wf">
        <conditional name="wf">
            <param type="select" name="wavelet_filter" label="Wavelet transform filter" help="wavelet function and filter length [1] (see footnotes for more details)">
                <option value="d" selected="True">Daubechies</option>
                <option value="la" >Least Asymetric</option>
                <option value="bl" >Best Localized</option>
                <option value="c" >Coiflet</option>
            </param>
            <when value="d">
                <param name="wavelet_length" type="select" label="filter length">
                    <option value="2" selected="True">2</option>
                    <option value="4">4</option>
                    <option value="6">6</option>
                    <option value="8">8</option>
                    <option value="10">10</option>
                    <option value="12">12</option>
                    <option value="14">14</option>
                    <option value="16">16</option>
                    <option value="18">18</option>
                    <option value="20">20</option>
                </param>
            </when>
            <when value="la">
                <param name="wavelet_length" type="select" label="filter length">
                    <option value="8">8</option>
                    <option value="10">10</option>
                    <option value="12">12</option>
                    <option value="14">14</option>
                    <option value="16">16</option>
                    <option value="18">18</option>
                    <option value="20">20</option>
                </param>
            </when>
            <when value="bl">
                <param name="wavelet_length" type="select" label="filter length">
                    <option value="14">14</option>
                    <option value="18">18</option>
                    <option value="20">20</option>
                </param>
            </when>
            <when value="c">
                <param name="wavelet_length" type="select" label="filter length">
                    <option value="6">6</option>
                    <option value="12">12</option>
                    <option value="18">18</option>
                    <option value="24">24</option>
                    <option value="30">30</option>
                </param>
            </when>
        </conditional>
    </xml>
    <xml name="split_output">
        <param name = "keep_two_output" label="Output metadata and data matrix as separate files" type="boolean" checked="false" 
                truevalue="TRUE" falsevalue="FALSE" help="Keep two output files, one being the data matrix (feature table) and the second being the  metadata table." />
    </xml>

    <xml name="outputs">
        <outputs>
            <data name="normalized_data" format="tabular" label="Normalized table of ${on_string}">
                <change_format>
                    <when input_dataset="data" attribute="ext" value="parquet" format="parquet" />
                </change_format>
            </data>
            <data name="metadata" format="tabular" label="Metadata table of ${on_string}">
                <filter>keep_two_output</filter>
                <change_format>
                    <when input_dataset="data" attribute="ext" value="parquet" format="parquet" />
                </change_format>
            </data>
        </outputs>
    </xml>

    <token name="@HELP@"><![CDATA[
        **Description**

        Removal of batch effects for large-scale untargeted metabolomics data based on wavelet analysis and independent
        component analysis. The WaveICA method uses the time trend of samples over the injection order, decomposes the
        original data into new multi-scale features, extracts and removes the batch effect resulting in normalized
        intensities across samples.

        The input is an intensity-by-feature table with metadata in the following format:

        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
        | sampleName    | class  | sampleType | injectionOrder | batch | M85T34     | M86T41       | M86T518     | M86T539     | ... |
        +===============+========+============+================+=======+============+==============+=============+=============+=====+
        | VT_160120_002 | sample | sample     | 1              | 1     | 228520.064 | 35646729.215 | 2386896.979 | 1026645.836 | ... |
        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
        | QC1           | sample | QC         | 2              | 1     | 90217.384  | 35735702.457 | 2456290.696 | 1089246.460 | ... |
        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+
        | ...           | ...    | ...        | ...            | ...   | ...        | ...          | ...         | ...         | ... |
        +---------------+--------+------------+----------------+-------+------------+--------------+-------------+-------------+-----+


        + The required columns are **sampleName**, **class**, **sampleType**, **injectionOrder**, and the **features** that you want to normalize.
        + The **batch** column is required if batch correction mode is **Multiple batches** and optional otherwise.
        + The presence of any additional columns (except features) will result in incorrect batch correction or job failure.
        + the input table must not contain missing values. Missing intensities must be filled with 0.
        + **sampleType** column accepts three possible values: [QC, sample, blank] (case insensitive).
        + **class** column is used to denote a biological group of a sample (e.g., positive/negative species). The column accepts any values.
        + the **output** is the same table with corrected feature intensities.

        .. rubric:: **Footnotes**
        .. [1] for details on wavelet-filter parameters refer to R `wavelets::wt.filter <https://www.rdocumentation.org/packages/wavelets/versions/0.3-0.2/topics/wt.filter>`_;
        .. [2] when using 'Multiple batches', please cite the WaveICA (2019) paper; else, cite WaveICA 2.0 (2021) paper;
    ]]>
    </token>
</macros>