view pca.xml @ 9:945a53c248de draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 3c1e6c72303cfd8a5fd014734f18402b97f8ecb5
author bgruening
date Fri, 22 Sep 2023 16:52:16 +0000
parents c16818ce0424
children
line wrap: on
line source

<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@" profile="@PROFILE@">
    <description>with scikit-learn</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="exit_code">
        <![CDATA[
        python '$__tool_directory__/pca.py'
        -i '$infile'
        $header
        -c '$column_selector_options.selected_column_selector_option'
        #if $column_selector_options.selected_column_selector_option != 'all_columns'
            -ci '$column_selector_options.col1'
        #end if
        #if $select_pca_type.number != ''
            -n '$select_pca_type.number'
        #end if
        -t '$select_pca_type.select_pca_opts'
        #if $select_pca_type.select_pca_opts == 'classical'           
            -s '$select_pca_type.select_solver_type.svd_solver_opts'
            #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
            #end if
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'incremental'
            #if $select_pca_type.batch_size != ''
                -b '$select_pca_type.batch_size'
            #end if           
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'kernel'
            -k '$select_pca_type.select_kernel_opts.kernel_opts'
            #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -d '$select_pca_type.select_kernel_opts.degree'
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #end if
            -e '$select_pca_type.select_solver_type.eigen_solver_opts'
            #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
                #if $select_pca_type.select_solver_type.max_iter != ''
                    -mi $select_pca_type.select_solver_type.max_iter
                #end if
            #end if
        #end if
        -o '$outfile'
        ]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="tabular" label="Input file" />
        <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset" />
        <conditional name="column_selector_options">
            <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile" />
        </conditional>
        <conditional name="select_pca_type">
            <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use">
                <option value="classical" selected="true">Classical PCA</option>
                <option value="incremental">Incremental PCA</option>
                <option value="kernel">Kernel PCA</option>
            </param>
            <when value="classical">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features" />
                <conditional name="select_solver_type">
                    <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition">
                        <option value="auto" selected="true">auto</option>
                        <option value="full">full</option>
                        <option value="arpack">arpack</option>
                        <option value="randomized">randomized</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" />
                    </when>
                    <when value="auto" />
                    <when value="full" />
                    <when value="randomized" />
                </conditional>
            </when>
            <when value="incremental">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" />
                <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch" />
            </when>
            <when value="kernel">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <conditional name="select_kernel_opts">
                    <param name="kernel_opts" type="select" label="Kernel Type">
                        <option value="linear" selected="true">linear</option>
                        <option value="poly">poly</option>
                        <option value="rbf">rbf</option>
                        <option value="sigmoid">sigmoid</option>
                        <option value="cosine">cosine</option>
                        <option value="precomputed">precomputed</option>
                    </param>
                    <when value="poly">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                        <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels" />
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="sigmoid">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="rbf">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="linear" />
                    <when value="cosine" />
                    <when value="precomputed" />
                </conditional>
                <conditional name="select_solver_type">
                    <param name="eigen_solver_opts" type="select" label="Eigen Solver">
                        <option value="auto" selected="true">auto</option>
                        <option value="dense">dense</option>
                        <option value="arpack">arpack</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" />
                        <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack" />
                    </when>
                    <when value="auto" />
                    <when value="dense" />
                </conditional>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="tabular" name="outfile" />
    </outputs>
    <tests>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="by_index_number" />
            <param name="col1" value="1,2,4,6,8,5" />
            <param name="number" value="5" />
            <param name="select_pca_opts" value="classical" />
            <param name="svd_solver_opts" value="arpack" />
            <param name="tolerance" value="0.4" />
            <output name="outfile" ftype='tabular' file="pca_classical_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" />
            <param name="header" value="--header" />
            <param name="selected_column_selector_option" value="by_header_name" />
            <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5" />
            <param name="number" value="5" />
            <param name="select_pca_opts" value="classical" />
            <param name="svd_solver_opts" value="arpack" />
            <param name="tolerance" value="0.4" />
            <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_but_by_index_number" />
            <param name="col1" value="8,5" />
            <param name="number" value="7" />
            <param name="select_pca_opts" value="incremental" />
            <param name="batch_size" value="64" />
            <output name="outfile" ftype='tabular' file="pca_incremental_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" />
            <param name="header" value="--header" />
            <param name="selected_column_selector_option" value="all_but_by_header_name" />
            <param name="col1" value="col_8,col_5" />
            <param name="number" value="7" />
            <param name="select_pca_opts" value="incremental" />
            <param name="batch_size" value="64" />
            <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_columns" />
            <param name="number" value="8" />
            <param name="select_pca_opts" value="kernel" />
            <param name="kernel_opts" value="linear" />
            <param name="eigen_solver_opts" value="arpack" />
            <param name="tolerance" value="4.3" />
            <param name="max_iter" value="8" />
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300" />
                    <has_n_columns n="8" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_columns" />
            <param name="number" value="8" />
            <param name="select_pca_opts" value="kernel" />
            <param name="kernel_opts" value="poly" />
            <param name="gamma" value="0.3" />
            <param name="degree" value="4" />
            <param name="coef0" value="1.6" />
            <param name="eigen_solver_opts" value="auto" />
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300" />
                    <has_n_columns n="8" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool takes a tabular input file (one data point per row, each column a variable)
and performs principal component analysis (PCA) using Singular Value Decomposition,
returning an equally sized tabular file with the first PC in the first column, second
PC in the second column, etc.
        ]]>
    </help>
    <expand macro="sklearn_citation" />
</tool>