annotate lasso_enet_var_select.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
1 <tool id="secimtools_lasso_enet_var_select" name="LASSO/Elastic Net Variable Selection," version="@WRAPPER_VERSION@">
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
2 <description>for feature selection.</description>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
3 <macros>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
4 <import>macros.xml</import>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
5 </macros>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
6 <expand macro="requirements" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
7 <stdio>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
8 <exit_code range="1:" level="warning" description="RuntimeWarning"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
9 </stdio>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
10 <command><![CDATA[
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
11 lasso_enet_var_select.py
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
12 --input $input
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
13 --design $design
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
14 --ID $uniqID
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
15 --group $group
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
16 --alpha $alpha
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
17 --coefficients $coefficients
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
18 --flags $flags
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
19 --plots $plots
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
20 ]]></command>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
21 <inputs>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
22 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
23 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
24 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
25 <param name="group" type="text" size="30" label="Group/Treatment." help="Name of the column in your design file that contains group classifications."/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
26 <param name="alpha" type="text" value=".5" size="30" label="shrinkage parameter α" help="Shrinkage parameter α specifies the penalty for the LASSO/Elastic Net procedure. Default 0.5"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
27 </inputs>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
28 <outputs>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
29 <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: Coefficients"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
30 <data format="tabular" name="flags" label="${tool.name} on ${on_string}: Flags"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
31 <data format="pdf" name="plots" label="${tool.name} on ${on_string}: Plots"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
32 </outputs>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
33 <tests>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
34 <test>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
35 <param name="input" value="ST000006_data.tsv"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
36 <param name="design" value="ST000006_design.tsv"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
37 <param name="uniqID" value="Retention_Index" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
38 <param name="group" value="White_wine_type_and_source" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
39 <param name="alpha" value="0.5" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
40 <output name="coefficients" file="ST000006_lasso_enet_var_select_coefficients.tsv" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
41 <output name="flags" file="ST000006_lasso_enet_var_select_flags.tsv" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
42 <output name="plots" file="ST000006_lasso_enet_var_select_plots.pdf" compare="sim_size" delta="10000" />
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
43 </test>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
44 </tests>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
45 <help><![CDATA[
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
46
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
47 @TIP_AND_WARNING@
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
48
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
49 **Tool Description**
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
50
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
51 The tool selects (identifies) features that are different between pairs of treatment groups.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
52 The selection is performed based on the logistic regression with Elastic Net shrinkage (with LASSO being a special case).
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
53 The selection method is defined by shrinkage parameter α.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
54 Variable selection can be performed for any value of α in the range [0:1] where α = 1 corresponds to the fewest number of variables and the most strict selection criterion (LASSO) and α = 0 corresponds to shrinkage without variable selection (Ridge regression). The default value is α = 0.5.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
55 The best subset of variables for a given α is selected by a cross validation procedure.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
56 Lambda is a penalty parameter determined during the cross validation procedure.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
57
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
58 More details about the Elastic Net and LASSO methods can be found in the reference below:
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
59
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
60 Zou, H., and Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 67(2), 301-320.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
61
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
62 Tibshirani, Robert. "Regression shrinkage and selection via the lasso." Journal of the Royal Statistical Society. Series B (Methodological) (1996): 267-288.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
63
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
64 Friedman, Jerome, Trevor Hastie, and Rob Tibshirani. "Regularization paths for generalized linear models via coordinate descent." Journal of statistical software 33, no. 1 (2010): 1.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
65
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
66 --------------------------------------------------------------------------------
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
67
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
68 **Note**
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
69
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
70 - This tool currently treats all variables as continuous numeric
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
71 variables. Running the tool on categorical variables might result in
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
72 incorrect results.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
73 - Rows containing non-numeric (or missing) data will be excluded.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
74
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
75 --------------------------------------------------------------------------------
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
76
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
77 **Input**
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
78
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
79 - Two input datasets are required.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
80
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
81 @WIDE@
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
82
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
83 **NOTE:** The sample IDs must match the sample IDs in the Design File (below).
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
84 Extra columns will automatically be ignored.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
85
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
86 @METADATA@
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
87
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
88 @UNIQID@
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
89
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
90 @GROUP@
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
91
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
92 **shrinkage parameter α**
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
93
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
94 - Specifies the penalty for the LASSO/Elastic Net procedure. Default = 0.5
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
95
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
96 --------------------------------------------------------------------------------
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
97
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
98 **Output**
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
99
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
100 This file outputs three files:
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
101
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
102 (1) A TSV file containing the values of the coefficients (including zeroes) for each feature generated by the tool for each pair of comparisons (in columns). These coefficients are produced from the transformed data (as part of the LASSO/EN method) and should be interpreted with caution.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
103
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
104 (2) A TSV file containing the corresponding flags for each feature where the value “1” corresponds to features selected by the method.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
105
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
106 (3) A PDF file containing graphs for each pairwise comparison between the groups. The first graph displays the behavior of the coefficients based on the value of penalty parameter lambda and the shrinkage parameter α. The second graph provides details of cross-validation procedure used for detection of the optimal penalty and for feature selection.
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
107
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
108 ]]></help>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
109 <expand macro="citations"/>
2e7d47c0b027 "planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff changeset
110 </tool>