view mixomics_blocksplsda.xml @ 1:e93350dc99f1 draft

"planemo upload for repository https://gitlab.com/bilille/galaxy-viscorvar commit 1eda261d4fe137d6e8806b0c6af7eaf12d11ac95"
author ppericard
date Thu, 15 Oct 2020 12:22:25 +0000
parents d0b77b926863
children c8533e9298e5
line wrap: on
line source

<tool id="mixomics_blocksplsda" name="mixOmics block.splsda" version="6.12+galaxy0" profile="16.04" workflow_compatible="true">

    <description>performs N-integration and feature selection with Projection to Latent Structures models (PLS) with sparse Discriminant Analysis</description>

    <requirements>
        <requirement type="package" version="6.12">bioconductor-mixomics</requirement>
        <requirement type="package" version="2.0">r-argparse</requirement>
    </requirements>

<!--     <stdio>
        <exit_code range="1:" level="fatal" />
    </stdio> -->

    <command detect_errors="aggressive">
        <![CDATA[
        mkdir outdir
        && Rscript
        ${__tool_directory__}/mixomics_blocksplsda_script.R
        #for $b in $blocks
            --block
            #if $b.block_name
                ${b.block_name}
            #else
                ${b.data_matrix.name}
            #end if
            ${b.keep}
            ${b.data_matrix}
            ${b.variable_metadata}
        #end for
        --sample_metadata_in ${sample_metadata_in}
        --sample_description_col ${sample_description_col}
        --ncomp ${adv.ncomp}
        ${correlation}
        ##--scheme ${adv.scheme}
        ##--mode ${adv.mode}
        --maxiter ${adv.maxiter}
        ##${adv.scale}
        --scale ## always activated hidden param
        ${adv.check_missing_values}
        ##--init ${adv.init}
        ##--tol ${adv.tol}
        ##${adv.nearzerovar}
        --rdata_out ${rdata_out}
        ##--sample_metadata_out ${sample_metadata_out}
        --variable_metadata_outdir outdir
        ]]>
    </command>

    <inputs>
        <repeat name="blocks" title="Blocks">
            <param name="block_name" type="text" label="Block name" />
            <param name="keep" type="integer" value="0" min="0"
                   label="Number of variables to select for each component"
                   help="[keep] Estimation of the number of variables in the block correlated with variables from the other blocks and correlated with response variables. If set to 0, all variables will be selected." />
            <param name="data_matrix" type="data" format="tabular"
                   label="Data matrix"
                   help="Block data in tabular format (rows = variables, columns = samples). The first column contains the variables names and the first row contains the samples names. Samples names must be in the same order for all blocks and the sample metadata (transposed). The data must not contain missing values." />
            <param name="variable_metadata" type="data" format="tabular" optional="true"
                   label="Variables metadata [optional]"
                   help="Variables metadata in tabular format (rows = variables). The first colum contains the variables names. The first row contains the metadata column names. The number of rows in the metadata file must be the same than the number of rows in the block data file, and the variables need to be in the same order. If a metadata file is provided, block.splsda output will be appended as new columns, otherwise a new file will be created." />
        </repeat>
        <param name="sample_metadata_in" type="data" format="tabular"
               label="Samples metadata"
               help="Samples metadata in tabular format (rows = samples). The first column contains the sample names. The first row contains the metadata column names. Samples names must be in the same order (transposed) than all the blocks. One of the column (the last by default) must contain the samples groups for integration." />
        <param name="sample_description_col" type="integer" value="0" min="0"
               label="Samples groups column number"
               help="Column from the samples metadata file containing samples groups. If set to 0, the last column will be used." />
        <param name="correlation" type="boolean" truevalue="--correlation"
               falsevalue="" checked="false"
               label="Correlation between all blocks"
               help="[design] If set to Yes, data integration will take into account correlations between all the blocks (design matrix with diagonal coefficients set to 0 and the rest of the coefficients set to 1)." />
        <section name="adv" title="Advanced Options" expanded="false">
            <param name="ncomp" type="integer" value="2" min="1"
                   label="Number of components to include in the model"
                   help="[ncomp] Number of new variables (components) computed by the data integration." />
<!--             <param name="scheme" type="select" label="Scheme">
                <option value="horst" selected="true">horst</option>
                <option value="factorial"            >factorial</option>
                <option value="centroid"             >centroid</option>
            </param>
            <param name="mode" type="select" label="Mode">
                <option value="regression" selected="true">regression</option>
                <option value="canonical"                 >canonical</option>
                <option value="invariant"                 >invariant</option>
                <option value="classic"                   >classic</option>
            </param> -->
            <param name="maxiter" type="integer" value="100" min="1"
                   label="Maximum number of iterations"
                   help="[max.iter] Maximum number of iterations performed by block.splsda." />
<!--            <param name="scale" type="boolean" truevalue="-\-scale" falsevalue="" checked="true"
                   label="Scale"
                   help="if checked, each block is standardized to zero means and unit variances" /> -->
            <param name="check_missing_values" type="boolean" truevalue="-\-check_missing_values" falsevalue="" checked="true"
                   label="Check for missing values"
                   help="will raise an error if missing values are found in data matrices" />
<!--             <param name="init" type="select" label="Init">
                <option value="svd" selected="true">svd</option>
                <option value="svd.single"         >svd.single</option>
            </param>
            <param name="tol" type="float" value="1e-06" min="0"
                   label="Convergence stopping value"
                   help="[tol]" />
            <param name="nearzerovar" type="boolean" truevalue="-\-nearzerovar" falsevalue="" checked="true"
                   label="Should be set to TRUE in particular for data with many zero values" /> -->
        </section>
    </inputs>

    <outputs>
        <data name="rdata_out" format="rdata" label="${tool.name}_output.rdata" />
        <!-- <data name="sample_metadata_out" format="tabular"
              label="${tool.name}_${sample_metadata_in.name}" /> -->
        <collection name="blocks_output" type="list" label="${tool.name}_blocks_output">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv"
                               directory="outdir" format="tabular" />
        </collection>
    </outputs>

    <tests>
        <test>
            <repeat name="blocks">
                <param name="block_name" value="Block1" />
                <param name="data_matrix" value="in_block1_data.tabular" />
            </repeat>
            <repeat name="blocks">
                <param name="block_name" value="Block2" />
                <param name="data_matrix" value="in_block2_data.tabular" />
            </repeat>
            <param name="sample_metadata_in" value="in_sample_meta.tabular" />
            <output name="rdata_out" value="out_rdata.rdata" />
            <output name="sample_metadata_out" value="out_sample_meta.tabular" />
        </test>
    </tests>

    <help>
        <![CDATA[
.. class:: infomark

**Authors** Pierre Pericard (pierre.pericard@pasteur-lille.fr)

---------------------------------------------------

.. class:: infomark

**Please cite**

Rohart F, Gautier B, Singh A, Lê Cao KA (2017) mixOmics: An R package for ‘omics feature selection and multiple data integration.
PLOS Computational Biology 13(11): e1005752. https://doi.org/10.1371/journal.pcbi.1005752

---------------------------------------------------

======================
mixOmics blocks.splsda
======================

-----------
Description
-----------

The blocks.splsda function is part of the mixOmics package for exploration and integration of Omics datasets.
Performs N-integration and feature selection with Projection to Latent Structures models (PLS) with sparse Discriminant Analysis.

-----------
Input files
-----------

For each block (min 2 blocks):
==============================

+------------------------------+------------+
| Parameter : num + label      |   Format   |
+==============================+============+
| 1 : Data matrix              |   tabular  |
+------------------------------+------------+
| 2 : [opt] Variables metadata |   tabular  |
+------------------------------+------------+

Variables metadata files are optional.
If a file is provided, output metadata will be appended to the input file, otherwise a new output file will be created.

1. Data matrix format
    * Rows = variables, Columns = samples
    * First row = samples name. MUST be the same and in the same order in every block as well as in the sample metadata file (transposed)
    * First column = variables name

2. Variables metadata format
    * Rows = variables, Columns = metadata
    * First row = metadata column names
    * First column = variables names. MUST be the same and in the same order than in the associated data matrix

Global input files:
===================

+-----------------------------+------------+
| Parameter : num + label     |   Format   |
+=============================+============+
| 1 : Samples metadata        |   tabular  |
+-----------------------------+------------+

By default, the last column of the samples metadata matrix will be used as samples description factors.
If it's not the case, the column number can be inputed in the `Sample description column number` parameter.

1. Samples metadata format
    * Rows = samples, Columns = metadata
    * First row = metadata column names
    * First column = sample names. These names must be identical (transposed) and in the same order than for the blocks data matrices

----------
Parameters
----------

For each block (min 2 blocks):
==============================

Block name
    Name of the block. If not provided, this will default to the input filename

Number of variables to select for each component
    If set to 0 (default), all variables will be considered in the model

Global parameters:
==================

Sample description column number

Number of components to include in the model

Correlation between all blocks

Advanced options:
=================

Scheme

Mode

Maximum number of iterations

Scale

Init

Convergence stopping value (tol)

Near zero var

------------
Output files
------------

mixomics_blocksplsda_output.rdata
    | rData output
    | Contains the `mixomics_result` R object containing the result of the block.splsda function

mixomics_blocksplsda_{input_sample_metadata_name}
    | tabular output
    | Identical to the input Sample metadata file with appended columns from the result of block.splsda function

mixomics_blocksplsda_blocks_output
    A collection with the variable metadata output files (mixomics_blocksplsda_block_{block name}_variable_metadata) for each input block

        ]]>
    </help>

    <citations>
        <citation type="doi">10.1371/journal.pcbi.1005752</citation>
    </citations>

</tool>