Mercurial > repos > bgruening > sklearn_pca

<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@">
    <description>with scikit-learn</description>
    <macros>
        <import>main_macros.xml</import>
        <token name="@GALAXY_VERSION@">0</token>
    </macros>
    <expand macro="python_requirements"/>
    <expand macro="macro_stdio"/>
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="exit_code">
        <![CDATA[
        python '$__tool_directory__/pca.py'
        -i '$infile'
        $header
        -c '$column_selector_options.selected_column_selector_option'
        #if $column_selector_options.selected_column_selector_option != 'all_columns'
            -ci '$column_selector_options.col1'
        #end if
        #if $select_pca_type.number != ''
            -n '$select_pca_type.number'
        #end if
        -t '$select_pca_type.select_pca_opts'
        #if $select_pca_type.select_pca_opts == 'classical'
            -s '$select_pca_type.select_solver_type.svd_solver_opts'
            #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
            #end if
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'incremental'
            #if $select_pca_type.batch_size != ''
                -b '$select_pca_type.batch_size'
            #end if
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'kernel'
            -k '$select_pca_type.select_kernel_opts.kernel_opts'
            #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -d '$select_pca_type.select_kernel_opts.degree'
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #end if
            -e '$select_pca_type.select_solver_type.eigen_solver_opts'
            #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
                #if $select_pca_type.select_solver_type.max_iter != ''
                    -mi $select_pca_type.select_solver_type.max_iter
                #end if
            #end if
        #end if
        -o '$outfile'
        ]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="tabular" label="Input file"/>
        <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/>
        <conditional name="column_selector_options">
            <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/>
        </conditional>
        <conditional name="select_pca_type">
            <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use">
                <option value="classical" selected="true">Classical PCA</option>
                <option value="incremental">Incremental PCA</option>
                <option value="kernel">Kernel PCA</option>
            </param>
            <when value="classical">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/>
                <conditional name="select_solver_type">
                    <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition">
                        <option value="auto" selected="true">auto</option>
                        <option value="full">full</option>
                        <option value="arpack">arpack</option>
                        <option value="randomized">randomized</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
                    </when>
                    <when value="auto"/>
                    <when value="full"/>
                    <when value="randomized"/>
                </conditional>
            </when>
            <when value="incremental">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/>
                <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/>
            </when>
            <when value="kernel">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
                <conditional name="select_kernel_opts">
                    <param name="kernel_opts" type="select" label="Kernel Type">
                        <option value="linear" selected="true">linear</option>
                        <option value="poly">poly</option>
                        <option value="rbf">rbf</option>
                        <option value="sigmoid">sigmoid</option>
                        <option value="cosine">cosine</option>
                        <option value="precomputed">precomputed</option>
                    </param>
                    <when value="poly">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
                        <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/>
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
                    </when>
                    <when value="sigmoid">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
                    </when>
                    <when value="rbf">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
                    </when>
                    <when value="linear"/>
                    <when value="cosine"/>
                    <when value="precomputed"/>
                </conditional>
                <conditional name="select_solver_type">
                    <param name="eigen_solver_opts" type="select" label="Eigen Solver">
                        <option value="auto" selected="true">auto</option>
                        <option value="dense">dense</option>
                        <option value="arpack">arpack</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
                        <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/>
                    </when>
                    <when value="auto"/>
                    <when value="dense"/>
                </conditional>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="tabular" name="outfile"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular"/>
            <param name="selected_column_selector_option" value="by_index_number"  />
            <param name="col1" value="1,2,4,6,8,5"/>
            <param name="number" value="5"/>
            <param name="select_pca_opts" value="classical"/>
            <param name="svd_solver_opts" value="arpack"/>
            <param name="tolerance" value="0.4"/>
            <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/>
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
            <param name="header" value="--header"/>
            <param name="selected_column_selector_option" value="by_header_name"  />
            <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/>
            <param name="number" value="5"/>
            <param name="select_pca_opts" value="classical"/>
            <param name="svd_solver_opts" value="arpack"/>
            <param name="tolerance" value="0.4"/>
            <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/>
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular"/>
            <param name="selected_column_selector_option" value="all_but_by_index_number"/>
            <param name="col1" value="8,5" />
            <param name="number" value="7"/>
            <param name="select_pca_opts" value="incremental"/>
            <param name="batch_size" value="64"/>
            <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/>
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
            <param name="header" value="--header"/>
            <param name="selected_column_selector_option" value="all_but_by_header_name"  />
            <param name="col1" value="col_8,col_5"/>
            <param name="number" value="7"/>
            <param name="select_pca_opts" value="incremental"/>
            <param name="batch_size" value="64"/>
            <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/>
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular"/>
            <param name="selected_column_selector_option" value="all_columns"  />
            <param name="number" value="8"/>
            <param name="select_pca_opts" value="kernel"/>
            <param name="kernel_opts" value="linear"/>
            <param name="eigen_solver_opts" value="arpack"/>
            <param name="tolerance" value="4.3"/>
            <param name="max_iter" value="8"/>
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300"/>
                    <has_n_columns n="8"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular"/>
            <param name="selected_column_selector_option" value="all_columns"  />
            <param name="number" value="8"/>
            <param name="select_pca_opts" value="kernel"/>
            <param name="kernel_opts" value="poly"/>
            <param name="gamma" value="0.3"/>
            <param name="degree" value="4"/>
            <param name="coef0" value="1.6"/>
            <param name="eigen_solver_opts" value="auto"/>
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300"/>
                    <has_n_columns n="8"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool takes a tabular input file (one data point per row, each column a variable)
and performs PCA using Singular Value Decomposition, returning an equally sized tabular
file with the first PC in the first column, second PC in the second column, etc.
        ]]>
    </help>
    <expand macro="sklearn_citation"/>
</tool>
author	bgruening
date	Fri, 02 Oct 2020 08:45:21 +0000
parents
children	132805688fa3