Mercurial > repos > bgruening > sklearn_pca
diff pca.xml @ 0:2d7016b3ae92 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2afb24f3c81d625312186750a714d702363012b5"
author | bgruening |
---|---|
date | Fri, 02 Oct 2020 08:45:21 +0000 |
parents | |
children | 132805688fa3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pca.xml Fri Oct 02 08:45:21 2020 +0000 @@ -0,0 +1,230 @@ +<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@"> + <description>with scikit-learn</description> + <macros> + <import>main_macros.xml</import> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/pca.py' + -i '$infile' + $header + -c '$column_selector_options.selected_column_selector_option' + #if $column_selector_options.selected_column_selector_option != 'all_columns' + -ci '$column_selector_options.col1' + #end if + #if $select_pca_type.number != '' + -n '$select_pca_type.number' + #end if + -t '$select_pca_type.select_pca_opts' + #if $select_pca_type.select_pca_opts == 'classical' + -s '$select_pca_type.select_solver_type.svd_solver_opts' + #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack' + -tol $select_pca_type.select_solver_type.tolerance + #end if + $select_pca_type.whiten + #elif $select_pca_type.select_pca_opts == 'incremental' + #if $select_pca_type.batch_size != '' + -b '$select_pca_type.batch_size' + #end if + $select_pca_type.whiten + #elif $select_pca_type.select_pca_opts == 'kernel' + -k '$select_pca_type.select_kernel_opts.kernel_opts' + #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + -d '$select_pca_type.select_kernel_opts.degree' + -cf '$select_pca_type.select_kernel_opts.coef0' + #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid' + #if $select_pca_type.select_kernel_opts.gamma != '' + -g '$select_pca_type.select_kernel_opts.gamma' + #end if + -cf '$select_pca_type.select_kernel_opts.coef0' + #end if + -e '$select_pca_type.select_solver_type.eigen_solver_opts' + #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack' + -tol $select_pca_type.select_solver_type.tolerance + #if $select_pca_type.select_solver_type.max_iter != '' + -mi $select_pca_type.select_solver_type.max_iter + #end if + #end if + #end if + -o '$outfile' + ]]> + </command> + <inputs> + <param name="infile" type="data" format="tabular" label="Input file"/> + <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/> + <conditional name="column_selector_options"> + <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/> + </conditional> + <conditional name="select_pca_type"> + <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use"> + <option value="classical" selected="true">Classical PCA</option> + <option value="incremental">Incremental PCA</option> + <option value="kernel">Kernel PCA</option> + </param> + <when value="classical"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> + <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/> + <conditional name="select_solver_type"> + <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition"> + <option value="auto" selected="true">auto</option> + <option value="full">full</option> + <option value="arpack">arpack</option> + <option value="randomized">randomized</option> + </param> + <when value="arpack"> + <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> + </when> + <when value="auto"/> + <when value="full"/> + <when value="randomized"/> + </conditional> + </when> + <when value="incremental"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> + <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/> + <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/> + </when> + <when value="kernel"> + <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/> + <conditional name="select_kernel_opts"> + <param name="kernel_opts" type="select" label="Kernel Type"> + <option value="linear" selected="true">linear</option> + <option value="poly">poly</option> + <option value="rbf">rbf</option> + <option value="sigmoid">sigmoid</option> + <option value="cosine">cosine</option> + <option value="precomputed">precomputed</option> + </param> + <when value="poly"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> + <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/> + <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> + </when> + <when value="sigmoid"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> + <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/> + </when> + <when value="rbf"> + <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/> + </when> + <when value="linear"/> + <when value="cosine"/> + <when value="precomputed"/> + </conditional> + <conditional name="select_solver_type"> + <param name="eigen_solver_opts" type="select" label="Eigen Solver"> + <option value="auto" selected="true">auto</option> + <option value="dense">dense</option> + <option value="arpack">arpack</option> + </param> + <when value="arpack"> + <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/> + <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/> + </when> + <when value="auto"/> + <when value="dense"/> + </conditional> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="outfile"/> + </outputs> + <tests> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular"/> + <param name="selected_column_selector_option" value="by_index_number" /> + <param name="col1" value="1,2,4,6,8,5"/> + <param name="number" value="5"/> + <param name="select_pca_opts" value="classical"/> + <param name="svd_solver_opts" value="arpack"/> + <param name="tolerance" value="0.4"/> + <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/> + </test> + <test> + <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> + <param name="header" value="--header"/> + <param name="selected_column_selector_option" value="by_header_name" /> + <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/> + <param name="number" value="5"/> + <param name="select_pca_opts" value="classical"/> + <param name="svd_solver_opts" value="arpack"/> + <param name="tolerance" value="0.4"/> + <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular"/> + <param name="selected_column_selector_option" value="all_but_by_index_number"/> + <param name="col1" value="8,5" /> + <param name="number" value="7"/> + <param name="select_pca_opts" value="incremental"/> + <param name="batch_size" value="64"/> + <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/> + </test> + <test> + <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/> + <param name="header" value="--header"/> + <param name="selected_column_selector_option" value="all_but_by_header_name" /> + <param name="col1" value="col_8,col_5"/> + <param name="number" value="7"/> + <param name="select_pca_opts" value="incremental"/> + <param name="batch_size" value="64"/> + <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular"/> + <param name="selected_column_selector_option" value="all_columns" /> + <param name="number" value="8"/> + <param name="select_pca_opts" value="kernel"/> + <param name="kernel_opts" value="linear"/> + <param name="eigen_solver_opts" value="arpack"/> + <param name="tolerance" value="4.3"/> + <param name="max_iter" value="8"/> + <output name="outfile" ftype="tabular"> + <assert_contents> + <has_n_lines n="300"/> + <has_n_columns n="8"/> + </assert_contents> + </output> + </test> + <test> + <param name="infile" value="pca_input.dat" ftype="tabular"/> + <param name="selected_column_selector_option" value="all_columns" /> + <param name="number" value="8"/> + <param name="select_pca_opts" value="kernel"/> + <param name="kernel_opts" value="poly"/> + <param name="gamma" value="0.3"/> + <param name="degree" value="4"/> + <param name="coef0" value="1.6"/> + <param name="eigen_solver_opts" value="auto"/> + <output name="outfile" ftype="tabular"> + <assert_contents> + <has_n_lines n="300"/> + <has_n_columns n="8"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +This tool takes a tabular input file (one data point per row, each column a variable) +and performs PCA using Singular Value Decomposition, returning an equally sized tabular +file with the first PC in the first column, second PC in the second column, etc. + ]]> + </help> + <expand macro="sklearn_citation"/> +</tool>