view lasso_enet_var_select.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
line wrap: on
line source

<tool id="secimtools_lasso_enet_var_select" name="LASSO/Elastic Net Variable Selection," version="@WRAPPER_VERSION@">
    <description>for feature selection.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <stdio>
        <exit_code range="1:" level="warning" description="RuntimeWarning"/>
    </stdio>
    <command><![CDATA[
lasso_enet_var_select.py
--input $input
--design $design
--ID $uniqID
--group $group
--alpha $alpha
--coefficients $coefficients
--flags $flags
--plots $plots
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/>
        <param name="group" type="text" size="30" label="Group/Treatment." help="Name of the column in your design file that contains group classifications."/>
        <param name="alpha" type="text" value=".5" size="30" label="shrinkage parameter α" help="Shrinkage parameter α specifies the penalty for the LASSO/Elastic Net procedure. Default 0.5"/>
    </inputs>
    <outputs>
        <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: Coefficients"/>
        <data format="tabular" name="flags" label="${tool.name} on ${on_string}: Flags"/>
        <data format="pdf" name="plots" label="${tool.name} on ${on_string}: Plots"/>
    </outputs>
    <tests>
        <test>
            <param name="input"  value="ST000006_data.tsv"/>
            <param name="design" value="ST000006_design.tsv"/>
            <param name="uniqID" value="Retention_Index" />
            <param name="group"  value="White_wine_type_and_source" />
            <param name="alpha"  value="0.5" />
            <output name="coefficients" file="ST000006_lasso_enet_var_select_coefficients.tsv" />
            <output name="flags"        file="ST000006_lasso_enet_var_select_flags.tsv" />
            <output name="plots"        file="ST000006_lasso_enet_var_select_plots.pdf" compare="sim_size" delta="10000" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

The tool selects (identifies) features that are different between pairs of treatment groups.
The selection is performed based on the logistic regression with Elastic Net shrinkage (with LASSO being a special case).
The selection method is defined by shrinkage parameter α.
Variable selection can be performed for any value of α in the range [0:1] where α = 1 corresponds to the fewest number of variables and the most strict selection criterion (LASSO) and α = 0 corresponds to shrinkage without variable selection (Ridge regression).  The default value is  α = 0.5.
The best subset of variables for a given α is selected by a cross validation procedure.
Lambda is a penalty parameter determined during the cross validation procedure.

More details about the Elastic Net and LASSO methods can be found in the reference below:

Zou, H., and Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 67(2), 301-320.

Tibshirani, Robert. "Regression shrinkage and selection via the lasso." Journal of the Royal Statistical Society. Series B (Methodological) (1996): 267-288.

Friedman, Jerome, Trevor Hastie, and Rob Tibshirani. "Regularization paths for generalized  linear models via coordinate descent." Journal of statistical software 33, no. 1 (2010): 1.

--------------------------------------------------------------------------------

**Note**

- This tool currently treats all variables as continuous numeric
  variables. Running the tool on categorical variables might result in
  incorrect results.
- Rows containing non-numeric (or missing) data will be excluded.

--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File (below).
Extra columns will automatically be ignored.

@METADATA@

@UNIQID@

@GROUP@

**shrinkage parameter α**

    - Specifies the penalty for the LASSO/Elastic Net procedure.  Default = 0.5

--------------------------------------------------------------------------------

**Output**

This file outputs three files:

(1) A TSV file containing the values of the coefficients (including zeroes) for each feature generated by the tool for each pair of comparisons (in columns).  These coefficients are produced from the transformed data (as part of the LASSO/EN method) and should be interpreted with caution.

(2) A TSV file containing the corresponding flags for each feature where the value “1” corresponds to features selected by the method.

(3) A PDF file containing graphs for each pairwise comparison between the groups.  The first graph displays the behavior of the coefficients based on the value of penalty parameter lambda and the shrinkage parameter α.  The second graph provides details of cross-validation procedure used for detection of the optimal penalty and for feature selection.

    ]]></help>
    <expand macro="citations"/>
</tool>