annotate tools/human_genome_variation/lps.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="hgv_lps" name="LPS" version="1.0.0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>LASSO-Patternsearch algorithm</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="bash">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 lps_tool_wrapper.sh $lambda_fac $input_file $label_column $output_file $log_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 Initialization 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 #if $advanced.options == "true":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 Sample $advanced.sample
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 Verbosity $advanced.verbosity
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 Standardize $advanced.standardize
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 initialLambda $advanced.initialLambda
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 #if $advanced.continuation.continuation == "1":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 Continuation $advanced.continuation.continuation
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 continuationSteps $advanced.continuation.continuationSteps
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 accurateIntermediates $advanced.continuation.accurateIntermediates
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 printFreq $advanced.printFreq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 #if $advanced.newton.newton == "1":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 Newton $advanced.newton.newton
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 NewtonThreshold $advanced.newton.newtonThreshold
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 HessianSampleFraction $advanced.hessianSampleFraction
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 BB 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 Monotone 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 FullGradient $advanced.fullGradient
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 GradientFraction $advanced.gradientFraction
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 InitialAlpha $advanced.initialAlpha
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 AlphaIncrease $advanced.alphaIncrease
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 AlphaDecrease $advanced.alphaDecrease
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 AlphaMax $advanced.alphaMax
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 c1 $advanced.c1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 MaxIter $advanced.maxIter
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 StopTol $advanced.stopTol
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 IntermediateTol $advanced.intermediateTol
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 FinalOnly $advanced.finalOnly
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <param name="input_file" type="data" format="tabular" label="Dataset"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param name="label_column" type="data_column" data_ref="input_file" numerical="true" label="Label column" help="Column containing outcome labels: +1 or -1."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <param name="lambda_fac" label="Lambda_fac" type="float" value="0.03" help="Target value of the regularization parameter, expressed as a fraction of the calculated lambda_max.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <validator type="in_range" message="0.00 &lt; lambda_fac &lt;= 1.00" min="0.00" max="1.00"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <conditional name="advanced">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 <param name="options" type="select" label="Advanced Options">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <option value="false" selected="true">Hide advanced options</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <option value="true">Show advanced options</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <when value="false">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <!-- no options -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <when value="true">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <!-- HARDCODED: 'Sample' we don't support passing an array -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <param name="sample" type="float" value="1.0" label="Sample fraction" help="Sample this fraction of the data set.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <validator type="in_range" message="0.0 &lt;= sample &lt;= 1.0" min="0.0" max="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <!-- HARDCODED: 'Initialization' = 0 :: Initialize at beta=0 -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <param name="verbosity" type="select" format="integer" label="Verbosity">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <option value="0" selected="true">Little output</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 <option value="1">More output</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 <option value="2">Still more output</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <param name="standardize" type="select" format="integer" label="Standardize" help="Scales and shifts each column so that it has mean zero and variance 1.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <option value="0" selected="true">Don't standardize</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 <option value="1">Standardize</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 <param name="initialLambda" type="float" value="0.8" label="Initial lambda" help="First value of lambda to be used in the continuation scheme, expressed as a fraction of lambda_max.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <validator type="in_range" message="0.0 &lt; initialLambda &lt; 1.0" min="0.0" max="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 <conditional name="continuation">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <param name="continuation" type="select" format="integer" label="Continuation" help="Use continuation strategy to start with a larger value of lambda, decreasing it successively to lambda_fac.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <option value="0" selected="true">Don't use continuation</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <option value="1">Use continuation</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <when value="0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <!-- no options -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <when value="1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <param name="continuationSteps" type="integer" value="5" label="Continuation steps" help="Number of lambda values to use in continuation &lt;em&gt;prior&lt;/em&gt; to target value lambda_fac."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <param name="accurateIntermediates" type="select" format="integer" label="Accurate intermediates" help="Indicates whether accurate solutions are required for lambda values other than the target value lambda_fac.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <option value="0" selected="true">Don't need accurate intemediates</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <option value="1">Calculate accurate intermediates</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 </conditional> <!-- name="continuation" -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <param name="printFreq" type="integer" value="1" label="Print frequency" help="Print a progress report every NI iterations, where NI is the supplied value of this parameter.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 <validator type="in_range" message="printFreq &gt;= 1" min="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 <conditional name="newton">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <param name="newton" type="select" format="integer" label="Projected Newton steps">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <option value="0" selected="true">No Newton steps</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <option value="1">Try projected Newton steps</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <when value="0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <!-- no options -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <when value="1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 <param name="newtonThreshold" type="integer" value="500" label="Newton threshold" help="Maximum size of free variable subvector for Newton."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <param name="hessianSampleFraction" type="float" value="1.0" label="Hessian sample fraction" help="Fraction of terms to use in approximate Hessian calculation.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 <validator type="in_range" message="0.01 &lt; hessianSampleFraction &lt;= 1.00" min="0.01" max="1.00"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <!-- HARDCODED: 'BB' = 0 :: don't use Barzilai-Borwein steps -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <!-- HARDCODED: 'Monotone' = 0 :: don't force monotonicity -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <param name="fullGradient" type="select" format="integer" label="Partial gradient vector selection">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 <option value="0">Use randomly selected partial gradient, including current active components ("biased")</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <option value="1">Use full gradient vector at every step</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 <option value="2">Randomly selected partial gradient, without regard to current active set ("unbiased")</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 <param name="gradientFraction" type="float" value="0.1" label="Gradient fraction" help="Fraction of inactive gradient vector to evaluate.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 <validator type="in_range" message="0.0 &lt; gradientFraction &lt;= 1" min="0.0" max="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 <param name="initialAlpha" type="float" value="1.0" label="Initial value of alpha"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <param name="alphaIncrease" type="float" value="2.0" label="Alpha increase" help="Factor by which to increase alpha after descent not obtained."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <param name="alphaDecrease" type="float" value="0.8" label="Alpha decrease" help="Factor by which to decrease alpha after successful first-order step."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 <param name="alphaMax" type="float" value="1e12" label="Alpha max" help="Maximum value of alpha; terminate with error if we exceed this."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 <param name="c1" type="float" value="1e-3" help="Parameter defining the margin by which the first-order step is required to decrease before being taken.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <validator type="in_range" message="0.0 &lt; c1 &lt; 1.0" min="0.0" max="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <param name="maxIter" type="integer" value="10000" label="Maximum number of iterations" help="Terminate with error if we exceed this."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 <param name="stopTol" type="float" value="1e-6" label="Stop tolerance" help="Convergence tolerance for target value of lambda."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 <param name="intermediateTol" type="float" value="1e-4" label="Intermediate tolerance" help="Convergence tolerance for intermediate values of lambda."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 <param name="finalOnly" type="select" format="integer" label="Final only">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <option value="0" selected="true">Return information for all intermediate values</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <option value="1">Just return information at the last lambda</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 </when> <!-- value="advanced" -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 </conditional> <!-- name="advanced" -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: results"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 <data name="log_file" format="txt" label="${tool.name} on ${on_string}: log"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 <requirement type="package">lps_tool</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 <param name="input_file" value="lps_arrhythmia.tabular"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 <param name="label_column" value="280"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 <param name="lambda_fac" value="0.03"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 <param name="options" value="true"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 <param name="sample" value="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 <param name="verbosity" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 <param name="standardize" value="0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 <param name="initialLambda" value="0.9"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 <param name="continuation" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 <param name="continuationSteps" value="10"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 <param name="accurateIntermediates" value="0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 <param name="printFreq" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 <param name="newton" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 <param name="newtonThreshold" value="500"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 <param name="hessianSampleFraction" value="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 <param name="fullGradient" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 <param name="gradientFraction" value="0.5"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 <param name="initialAlpha" value="1.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 <param name="alphaIncrease" value="2.0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 <param name="alphaDecrease" value="0.8"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 <param name="alphaMax" value="1e12"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 <param name="c1" value="1e-3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 <param name="maxIter" value="2500"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 <param name="stopTol" value="1e-6"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 <param name="intermediateTol" value="1e-6"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 <param name="finalOnly" value="0"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 <output name="ouput_file" file="lps_arrhythmia_beta.tabular"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 <output name="log_file" file="lps_arrhythmia_log.txt"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 **Dataset formats**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 The input and output datasets are tabular_. The columns are described below.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 There is a second output dataset (a log) that is in text_ format.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 (`Dataset missing?`_)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 .. _tabular: ./static/formatHelp.html#tab
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 .. _text: ./static/formatHelp.html#text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 .. _Dataset missing?: ./static/formatHelp.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 The LASSO-Patternsearch algorithm fits your dataset to an L1-regularized
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 logistic regression model. A benefit of using L1-regularization is
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 that it typically yields a weight vector with relatively few non-zero
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 coefficients.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 For example, say you have a dataset containing M rows (subjects)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 and N columns (attributes) where one of these N attributes is binary,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 indicating whether or not the subject has some property of interest P.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 In simple terms, LPS calculates a weight for each of the other attributes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 in your dataset. This weight indicates how "relevant" that attribute
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 is for predicting whether or not a given subject has property P.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 The L1-regularization causes most of these weights to be equal to zero,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 which means LPS will find a "small" subset of the remaining N-1 attributes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 in your dataset that can be used to predict P.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 In other words, LPS can be used for feature selection.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 The input dataset is tabular, and must contain a label column which
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 indicates whether or not a given row has property P. In the current
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 version of this tool, P must be encoded using +1 and -1. The Lambda_fac
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 parameter ranges from 0 to 1, and controls how sparse the weight
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 vector will be. At the low end, when Lambda_fac = 0, there will be
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 no regularization. At the high end, when Lambda_fac = 1, there will be
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 "too much" regularization, and all of the weights will equal zero.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 The LPS tool creates two output datasets. The first, called the results
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 file, is a tabular dataset containing one column of weights for each
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 value of the regularization parameter lambda that was tried. The weight
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 columns are in order from left to right by decreasing values of lambda.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 The first N-1 rows in each column are the weights for the N-1 attributes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 in your input dataset. The final row is a constant, the intercept.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 Let **x** be a row from your input dataset and let **b** be a column
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 from the results file. To compute the probability that row **x** has
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 a label value of +1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 Probability(row **x** has label value = +1) = 1 / [1 + exp{**x** \* **b**\[1..N-1\] + **b**\[N\]}]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 where **x** \* **b**\[1..N-1\] represents matrix multiplication.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 The second output dataset, called the log file, is a text file which
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 contains additional data about the fitted L1-regularized logistic
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 regression model. These data include the number of features, the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 computed value of lambda_max, the actual values of lambda used, the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 optimal values of the log-likelihood and regularized log-likelihood
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 functions, the number of non-zeros, and the number of iterations.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 Website: http://pages.cs.wisc.edu/~swright/LPS/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 - input file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 +1 1 0 0 0 0 1 0 1 1 ...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 +1 1 1 1 0 0 1 0 1 1 ...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 +1 1 0 1 0 1 0 1 0 1 ...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 - output results file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 0.025541
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 - output log file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 Data set has 100 vectors with 50 features.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 calculateLambdaMax: n=50, m=100, m+=50, m-=50
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 computed value of lambda_max: 5.0000e-01
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 lambda=2.96e-02 solution:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 optimal log-likelihood function value: 6.46e-01
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 optimal *regularized* log-likelihood function value: 6.79e-01
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 number of nonzeros at the optimum: 5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 number of iterations required: 43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 **References**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 Koh K, Kim S-J, Boyd S. (2007)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 An interior-point method for large-scale l1-regularized logistic regression.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 Journal of Machine Learning Research. 8:1519-1555.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 Shi W, Wahba G, Wright S, Lee K, Klein R, Klein B. (2008)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 LASSO-Patternsearch algorithm with application to ophthalmology and genomic data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 Stat Interface. 1(1):137-153.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 <!--
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 Wright S, Novak R, Figueiredo M. (2009)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 Sparse reconstruction via separable approximation.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 IEEE Transactions on Signal Processing. 57:2479-2403.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 Shi J, Yin W, Osher S, Sajda P. (2010)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 A fast hybrid algorithm for large scale l1-regularized logistic regression.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 Journal of Machine Learning Research. 11:713-741.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 Byrd R, Chin G, Neveitt W, Nocedal J. (2010)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 On the use of stochastic Hessian information in unconstrained optimization.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 Technical Report. Northwestern University. June 16, 2010.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 Wright S. (2010)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 Accelerated block-coordinate relaxation for regularized optimization.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 Technical Report. University of Wisconsin. August 10, 2010.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 </tool>