Mercurial > repos > malex > secimtools
annotate lasso_enet_var_select.xml @ 2:caba07f41453 draft default tip
"planemo upload for repository https://github.com/secimTools/SECIMTools/tree/main/galaxy commit 498abad641099412df56f04ff6e144e4193bbc34-dirty"
author | malex |
---|---|
date | Thu, 10 Jun 2021 15:41:17 +0000 |
parents | 2e7d47c0b027 |
children |
rev | line source |
---|---|
1
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
1 <tool id="secimtools_lasso_enet_var_select" name="LASSO/Elastic Net Variable Selection," version="@WRAPPER_VERSION@"> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
2 <description>for feature selection.</description> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
3 <macros> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
4 <import>macros.xml</import> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
5 </macros> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
7 <stdio> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
8 <exit_code range="1:" level="warning" description="RuntimeWarning"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
9 </stdio> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
10 <command><![CDATA[ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
11 lasso_enet_var_select.py |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
12 --input $input |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
13 --design $design |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
14 --ID $uniqID |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
15 --group $group |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
16 --alpha $alpha |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
17 --coefficients $coefficients |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
18 --flags $flags |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
19 --plots $plots |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
20 ]]></command> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
21 <inputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
22 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
23 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
24 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
25 <param name="group" type="text" size="30" label="Group/Treatment." help="Name of the column in your design file that contains group classifications."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
26 <param name="alpha" type="text" value=".5" size="30" label="shrinkage parameter α" help="Shrinkage parameter α specifies the penalty for the LASSO/Elastic Net procedure. Default 0.5"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
27 </inputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
28 <outputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
29 <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: Coefficients"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
30 <data format="tabular" name="flags" label="${tool.name} on ${on_string}: Flags"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
31 <data format="pdf" name="plots" label="${tool.name} on ${on_string}: Plots"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
32 </outputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
33 <tests> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
34 <test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
35 <param name="input" value="ST000006_data.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
36 <param name="design" value="ST000006_design.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
37 <param name="uniqID" value="Retention_Index" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
38 <param name="group" value="White_wine_type_and_source" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
39 <param name="alpha" value="0.5" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
40 <output name="coefficients" file="ST000006_lasso_enet_var_select_coefficients.tsv" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
41 <output name="flags" file="ST000006_lasso_enet_var_select_flags.tsv" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
42 <output name="plots" file="ST000006_lasso_enet_var_select_plots.pdf" compare="sim_size" delta="10000" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
43 </test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
44 </tests> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
45 <help><![CDATA[ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
46 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
47 @TIP_AND_WARNING@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
48 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
49 **Tool Description** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
50 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
51 The tool selects (identifies) features that are different between pairs of treatment groups. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
52 The selection is performed based on the logistic regression with Elastic Net shrinkage (with LASSO being a special case). |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
53 The selection method is defined by shrinkage parameter α. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
54 Variable selection can be performed for any value of α in the range [0:1] where α = 1 corresponds to the fewest number of variables and the most strict selection criterion (LASSO) and α = 0 corresponds to shrinkage without variable selection (Ridge regression). The default value is α = 0.5. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
55 The best subset of variables for a given α is selected by a cross validation procedure. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
56 Lambda is a penalty parameter determined during the cross validation procedure. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
57 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
58 More details about the Elastic Net and LASSO methods can be found in the reference below: |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
59 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
60 Zou, H., and Hastie, T. (2005). Regularization and variable selection via the elastic net. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 67(2), 301-320. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
61 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
62 Tibshirani, Robert. "Regression shrinkage and selection via the lasso." Journal of the Royal Statistical Society. Series B (Methodological) (1996): 267-288. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
63 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
64 Friedman, Jerome, Trevor Hastie, and Rob Tibshirani. "Regularization paths for generalized linear models via coordinate descent." Journal of statistical software 33, no. 1 (2010): 1. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
65 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
66 -------------------------------------------------------------------------------- |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
67 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
68 **Note** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
69 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
70 - This tool currently treats all variables as continuous numeric |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
71 variables. Running the tool on categorical variables might result in |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
72 incorrect results. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
73 - Rows containing non-numeric (or missing) data will be excluded. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
74 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
75 -------------------------------------------------------------------------------- |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
76 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
77 **Input** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
78 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
79 - Two input datasets are required. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
80 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
81 @WIDE@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
82 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
83 **NOTE:** The sample IDs must match the sample IDs in the Design File (below). |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
84 Extra columns will automatically be ignored. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
85 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
86 @METADATA@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
87 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
88 @UNIQID@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
89 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
90 @GROUP@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
91 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
92 **shrinkage parameter α** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
93 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
94 - Specifies the penalty for the LASSO/Elastic Net procedure. Default = 0.5 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
95 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
96 -------------------------------------------------------------------------------- |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
97 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
98 **Output** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
99 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
100 This file outputs three files: |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
101 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
102 (1) A TSV file containing the values of the coefficients (including zeroes) for each feature generated by the tool for each pair of comparisons (in columns). These coefficients are produced from the transformed data (as part of the LASSO/EN method) and should be interpreted with caution. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
103 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
104 (2) A TSV file containing the corresponding flags for each feature where the value “1” corresponds to features selected by the method. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
105 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
106 (3) A PDF file containing graphs for each pairwise comparison between the groups. The first graph displays the behavior of the coefficients based on the value of penalty parameter lambda and the shrinkage parameter α. The second graph provides details of cross-validation procedure used for detection of the optimal penalty and for feature selection. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
107 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
108 ]]></help> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
109 <expand macro="citations"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
110 </tool> |