Mercurial > repos > bgruening > sklearn_pca
view pca.xml @ 0:2d7016b3ae92 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2afb24f3c81d625312186750a714d702363012b5"
author | bgruening |
---|---|
date | Fri, 02 Oct 2020 08:45:21 +0000 |
parents | |
children | 132805688fa3 |
line wrap: on
line source
<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@"> <description>with scikit-learn</description> <macros> <import>main_macros.xml</import> <token name="@GALAXY_VERSION@">0</token> </macros> <expand macro="python_requirements"/> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command detect_errors="exit_code"> <![CDATA[ python '$__tool_directory__/pca.py' -i '$infile' $header -c '$column_selector_options.selected_column_selector_option' #if $column_selector_options.selected_column_selector_option != 'all_columns' -ci '$column_selector_options.col1' #end if #if $select_pca_type.number != '' -n '$select_pca_type.number' #end if -t '$select_pca_type.select_pca_opts' #if $select_pca_type.select_pca_opts == 'classical' -s '$select_pca_type.select_solver_type.svd_solver_opts' #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack' -tol $select_pca_type.select_solver_type.tolerance #end if $select_pca_type.whiten #elif $select_pca_type.select_pca_opts == 'incremental' #if $select_pca_type.batch_size != '' -b '$select_pca_type.batch_size' #end if $select_pca_type.whiten #elif $select_pca_type.select_pca_opts == 'kernel' -k '$select_pca_type.select_kernel_opts.kernel_opts' #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly' #if $select_pca_type.select_kernel_opts.gamma != '' -g '$select_pca_type.select_kernel_opts.gamma' #end if -d '$select_pca_type.select_kernel_opts.degree' -cf '$select_pca_type.select_kernel_opts.coef0' #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf' #if $select_pca_type.select_kernel_opts.gamma != '' -g '$select_pca_type.select_kernel_opts.gamma' #end if #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid' #if $select_pca_type.select_kernel_opts.gamma != '' -g '$select_pca_type.select_kernel_opts.gamma' #end if -cf '$select_pca_type.select_kernel_opts.coef0' #end if -e '$select_pca_type.select_solver_type.eigen_solver_opts' #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack' -tol $select_pca_type.select_solver_type.tolerance #if $select_pca_type.select_solver_type.max_iter != '' -mi $select_pca_type.select_solver_type.max_iter #end if #end if #end if -o '$outfile' ]]> </command> <inputs> <param name="infile" type="data" format="tabular" label="Input file"/> <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/> <conditional name="column_selector_options"> <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/> </conditional> <conditional name="select_pca_type"> <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use"> <option value="classical" selected="true">Classical PCA</option> <option value="incremental">Incremental PCA</option> <option value="kernel">Kernel PCA</option> </param> <when value="classical"> <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/> <conditional name="select_solver_type"> <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition"> <option value="auto" selected="true">auto</option> <option value="full">full</option> <option value="arpack">arpack</option> <option value="randomized">randomized</option> </param> <when value="arpack"> <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> </when> <when value="auto"/> <when value="full"/> <when value="randomized"/> </conditional> </when> <when value="incremental"> <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/> <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/> </when> <when value="kernel"> <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> <conditional name="select_kernel_opts"> <param name="kernel_opts" type="select" label="Kernel Type"> <option value="linear" selected="true">linear</option> <option value="poly">poly</option> <option value="rbf">rbf</option> <option value="sigmoid">sigmoid</option> <option value="cosine">cosine</option> <option value="precomputed">precomputed</option> </param> <when value="poly"> <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/> <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> </when> <when value="sigmoid"> <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> </when> <when value="rbf"> <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> </when> <when value="linear"/> <when value="cosine"/> <when value="precomputed"/> </conditional> <conditional name="select_solver_type"> <param name="eigen_solver_opts" type="select" label="Eigen Solver"> <option value="auto" selected="true">auto</option> <option value="dense">dense</option> <option value="arpack">arpack</option> </param> <when value="arpack"> <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/> </when> <when value="auto"/> <when value="dense"/> </conditional> </when> </conditional> </inputs> <outputs> <data format="tabular" name="outfile"/> </outputs> <tests> <test> <param name="infile" value="pca_input.dat" ftype="tabular"/> <param name="selected_column_selector_option" value="by_index_number" /> <param name="col1" value="1,2,4,6,8,5"/> <param name="number" value="5"/> <param name="select_pca_opts" value="classical"/> <param name="svd_solver_opts" value="arpack"/> <param name="tolerance" value="0.4"/> <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/> </test> <test> <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> <param name="header" value="--header"/> <param name="selected_column_selector_option" value="by_header_name" /> <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/> <param name="number" value="5"/> <param name="select_pca_opts" value="classical"/> <param name="svd_solver_opts" value="arpack"/> <param name="tolerance" value="0.4"/> <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/> </test> <test> <param name="infile" value="pca_input.dat" ftype="tabular"/> <param name="selected_column_selector_option" value="all_but_by_index_number"/> <param name="col1" value="8,5" /> <param name="number" value="7"/> <param name="select_pca_opts" value="incremental"/> <param name="batch_size" value="64"/> <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/> </test> <test> <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> <param name="header" value="--header"/> <param name="selected_column_selector_option" value="all_but_by_header_name" /> <param name="col1" value="col_8,col_5"/> <param name="number" value="7"/> <param name="select_pca_opts" value="incremental"/> <param name="batch_size" value="64"/> <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/> </test> <test> <param name="infile" value="pca_input.dat" ftype="tabular"/> <param name="selected_column_selector_option" value="all_columns" /> <param name="number" value="8"/> <param name="select_pca_opts" value="kernel"/> <param name="kernel_opts" value="linear"/> <param name="eigen_solver_opts" value="arpack"/> <param name="tolerance" value="4.3"/> <param name="max_iter" value="8"/> <output name="outfile" ftype="tabular"> <assert_contents> <has_n_lines n="300"/> <has_n_columns n="8"/> </assert_contents> </output> </test> <test> <param name="infile" value="pca_input.dat" ftype="tabular"/> <param name="selected_column_selector_option" value="all_columns" /> <param name="number" value="8"/> <param name="select_pca_opts" value="kernel"/> <param name="kernel_opts" value="poly"/> <param name="gamma" value="0.3"/> <param name="degree" value="4"/> <param name="coef0" value="1.6"/> <param name="eigen_solver_opts" value="auto"/> <output name="outfile" ftype="tabular"> <assert_contents> <has_n_lines n="300"/> <has_n_columns n="8"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** This tool takes a tabular input file (one data point per row, each column a variable) and performs PCA using Singular Value Decomposition, returning an equally sized tabular file with the first PC in the first column, second PC in the second column, etc. ]]> </help> <expand macro="sklearn_citation"/> </tool>