Mercurial > repos > xuebing > sharplabtool
comparison tools/human_genome_variation/lps.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 <tool id="hgv_lps" name="LPS" version="1.0.0"> | |
2 <description>LASSO-Patternsearch algorithm</description> | |
3 | |
4 <command interpreter="bash"> | |
5 lps_tool_wrapper.sh $lambda_fac $input_file $label_column $output_file $log_file | |
6 Initialization 0 | |
7 #if $advanced.options == "true": | |
8 Sample $advanced.sample | |
9 Verbosity $advanced.verbosity | |
10 Standardize $advanced.standardize | |
11 initialLambda $advanced.initialLambda | |
12 #if $advanced.continuation.continuation == "1": | |
13 Continuation $advanced.continuation.continuation | |
14 continuationSteps $advanced.continuation.continuationSteps | |
15 accurateIntermediates $advanced.continuation.accurateIntermediates | |
16 #end if | |
17 printFreq $advanced.printFreq | |
18 #if $advanced.newton.newton == "1": | |
19 Newton $advanced.newton.newton | |
20 NewtonThreshold $advanced.newton.newtonThreshold | |
21 #end if | |
22 HessianSampleFraction $advanced.hessianSampleFraction | |
23 BB 0 | |
24 Monotone 0 | |
25 FullGradient $advanced.fullGradient | |
26 GradientFraction $advanced.gradientFraction | |
27 InitialAlpha $advanced.initialAlpha | |
28 AlphaIncrease $advanced.alphaIncrease | |
29 AlphaDecrease $advanced.alphaDecrease | |
30 AlphaMax $advanced.alphaMax | |
31 c1 $advanced.c1 | |
32 MaxIter $advanced.maxIter | |
33 StopTol $advanced.stopTol | |
34 IntermediateTol $advanced.intermediateTol | |
35 FinalOnly $advanced.finalOnly | |
36 #end if | |
37 </command> | |
38 | |
39 <inputs> | |
40 <param name="input_file" type="data" format="tabular" label="Dataset"/> | |
41 <param name="label_column" type="data_column" data_ref="input_file" numerical="true" label="Label column" help="Column containing outcome labels: +1 or -1."/> | |
42 <param name="lambda_fac" label="Lambda_fac" type="float" value="0.03" help="Target value of the regularization parameter, expressed as a fraction of the calculated lambda_max."> | |
43 <validator type="in_range" message="0.00 < lambda_fac <= 1.00" min="0.00" max="1.00"/> | |
44 </param> | |
45 <conditional name="advanced"> | |
46 <param name="options" type="select" label="Advanced Options"> | |
47 <option value="false" selected="true">Hide advanced options</option> | |
48 <option value="true">Show advanced options</option> | |
49 </param> | |
50 <when value="false"> | |
51 <!-- no options --> | |
52 </when> | |
53 <when value="true"> | |
54 <!-- HARDCODED: 'Sample' we don't support passing an array --> | |
55 <param name="sample" type="float" value="1.0" label="Sample fraction" help="Sample this fraction of the data set."> | |
56 <validator type="in_range" message="0.0 <= sample <= 1.0" min="0.0" max="1.0"/> | |
57 </param> | |
58 <!-- HARDCODED: 'Initialization' = 0 :: Initialize at beta=0 --> | |
59 <param name="verbosity" type="select" format="integer" label="Verbosity"> | |
60 <option value="0" selected="true">Little output</option> | |
61 <option value="1">More output</option> | |
62 <option value="2">Still more output</option> | |
63 </param> | |
64 <param name="standardize" type="select" format="integer" label="Standardize" help="Scales and shifts each column so that it has mean zero and variance 1."> | |
65 <option value="0" selected="true">Don't standardize</option> | |
66 <option value="1">Standardize</option> | |
67 </param> | |
68 <param name="initialLambda" type="float" value="0.8" label="Initial lambda" help="First value of lambda to be used in the continuation scheme, expressed as a fraction of lambda_max."> | |
69 <validator type="in_range" message="0.0 < initialLambda < 1.0" min="0.0" max="1.0"/> | |
70 </param> | |
71 <conditional name="continuation"> | |
72 <param name="continuation" type="select" format="integer" label="Continuation" help="Use continuation strategy to start with a larger value of lambda, decreasing it successively to lambda_fac."> | |
73 <option value="0" selected="true">Don't use continuation</option> | |
74 <option value="1">Use continuation</option> | |
75 </param> | |
76 <when value="0"> | |
77 <!-- no options --> | |
78 </when> | |
79 <when value="1"> | |
80 <param name="continuationSteps" type="integer" value="5" label="Continuation steps" help="Number of lambda values to use in continuation <em>prior</em> to target value lambda_fac."/> | |
81 | |
82 <param name="accurateIntermediates" type="select" format="integer" label="Accurate intermediates" help="Indicates whether accurate solutions are required for lambda values other than the target value lambda_fac."> | |
83 <option value="0" selected="true">Don't need accurate intemediates</option> | |
84 <option value="1">Calculate accurate intermediates</option> | |
85 </param> | |
86 </when> | |
87 </conditional> <!-- name="continuation" --> | |
88 <param name="printFreq" type="integer" value="1" label="Print frequency" help="Print a progress report every NI iterations, where NI is the supplied value of this parameter."> | |
89 <validator type="in_range" message="printFreq >= 1" min="1"/> | |
90 </param> | |
91 <conditional name="newton"> | |
92 <param name="newton" type="select" format="integer" label="Projected Newton steps"> | |
93 <option value="0" selected="true">No Newton steps</option> | |
94 <option value="1">Try projected Newton steps</option> | |
95 </param> | |
96 <when value="0"> | |
97 <!-- no options --> | |
98 </when> | |
99 <when value="1"> | |
100 <param name="newtonThreshold" type="integer" value="500" label="Newton threshold" help="Maximum size of free variable subvector for Newton."/> | |
101 </when> | |
102 </conditional> | |
103 <param name="hessianSampleFraction" type="float" value="1.0" label="Hessian sample fraction" help="Fraction of terms to use in approximate Hessian calculation."> | |
104 <validator type="in_range" message="0.01 < hessianSampleFraction <= 1.00" min="0.01" max="1.00"/> | |
105 </param> | |
106 <!-- HARDCODED: 'BB' = 0 :: don't use Barzilai-Borwein steps --> | |
107 <!-- HARDCODED: 'Monotone' = 0 :: don't force monotonicity --> | |
108 <param name="fullGradient" type="select" format="integer" label="Partial gradient vector selection"> | |
109 <option value="0">Use randomly selected partial gradient, including current active components ("biased")</option> | |
110 <option value="1">Use full gradient vector at every step</option> | |
111 <option value="2">Randomly selected partial gradient, without regard to current active set ("unbiased")</option> | |
112 </param> | |
113 <param name="gradientFraction" type="float" value="0.1" label="Gradient fraction" help="Fraction of inactive gradient vector to evaluate."> | |
114 <validator type="in_range" message="0.0 < gradientFraction <= 1" min="0.0" max="1.0"/> | |
115 </param> | |
116 <param name="initialAlpha" type="float" value="1.0" label="Initial value of alpha"/> | |
117 <param name="alphaIncrease" type="float" value="2.0" label="Alpha increase" help="Factor by which to increase alpha after descent not obtained."/> | |
118 <param name="alphaDecrease" type="float" value="0.8" label="Alpha decrease" help="Factor by which to decrease alpha after successful first-order step."/> | |
119 <param name="alphaMax" type="float" value="1e12" label="Alpha max" help="Maximum value of alpha; terminate with error if we exceed this."/> | |
120 <param name="c1" type="float" value="1e-3" help="Parameter defining the margin by which the first-order step is required to decrease before being taken."> | |
121 <validator type="in_range" message="0.0 < c1 < 1.0" min="0.0" max="1.0"/> | |
122 </param> | |
123 <param name="maxIter" type="integer" value="10000" label="Maximum number of iterations" help="Terminate with error if we exceed this."/> | |
124 <param name="stopTol" type="float" value="1e-6" label="Stop tolerance" help="Convergence tolerance for target value of lambda."/> | |
125 <param name="intermediateTol" type="float" value="1e-4" label="Intermediate tolerance" help="Convergence tolerance for intermediate values of lambda."/> | |
126 <param name="finalOnly" type="select" format="integer" label="Final only"> | |
127 <option value="0" selected="true">Return information for all intermediate values</option> | |
128 <option value="1">Just return information at the last lambda</option> | |
129 </param> | |
130 </when> <!-- value="advanced" --> | |
131 </conditional> <!-- name="advanced" --> | |
132 </inputs> | |
133 | |
134 <outputs> | |
135 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: results"/> | |
136 <data name="log_file" format="txt" label="${tool.name} on ${on_string}: log"/> | |
137 </outputs> | |
138 | |
139 <requirements> | |
140 <requirement type="package">lps_tool</requirement> | |
141 </requirements> | |
142 | |
143 <tests> | |
144 <test> | |
145 <param name="input_file" value="lps_arrhythmia.tabular"/> | |
146 <param name="label_column" value="280"/> | |
147 <param name="lambda_fac" value="0.03"/> | |
148 <param name="options" value="true"/> | |
149 <param name="sample" value="1.0"/> | |
150 <param name="verbosity" value="1"/> | |
151 <param name="standardize" value="0"/> | |
152 <param name="initialLambda" value="0.9"/> | |
153 <param name="continuation" value="1"/> | |
154 <param name="continuationSteps" value="10"/> | |
155 <param name="accurateIntermediates" value="0"/> | |
156 <param name="printFreq" value="1"/> | |
157 <param name="newton" value="1"/> | |
158 <param name="newtonThreshold" value="500"/> | |
159 <param name="hessianSampleFraction" value="1.0"/> | |
160 <param name="fullGradient" value="1"/> | |
161 <param name="gradientFraction" value="0.5"/> | |
162 <param name="initialAlpha" value="1.0"/> | |
163 <param name="alphaIncrease" value="2.0"/> | |
164 <param name="alphaDecrease" value="0.8"/> | |
165 <param name="alphaMax" value="1e12"/> | |
166 <param name="c1" value="1e-3"/> | |
167 <param name="maxIter" value="2500"/> | |
168 <param name="stopTol" value="1e-6"/> | |
169 <param name="intermediateTol" value="1e-6"/> | |
170 <param name="finalOnly" value="0"/> | |
171 <output name="ouput_file" file="lps_arrhythmia_beta.tabular"/> | |
172 <output name="log_file" file="lps_arrhythmia_log.txt"/> | |
173 </test> | |
174 </tests> | |
175 | |
176 <help> | |
177 **Dataset formats** | |
178 | |
179 The input and output datasets are tabular_. The columns are described below. | |
180 There is a second output dataset (a log) that is in text_ format. | |
181 (`Dataset missing?`_) | |
182 | |
183 .. _tabular: ./static/formatHelp.html#tab | |
184 .. _text: ./static/formatHelp.html#text | |
185 .. _Dataset missing?: ./static/formatHelp.html | |
186 | |
187 ----- | |
188 | |
189 **What it does** | |
190 | |
191 The LASSO-Patternsearch algorithm fits your dataset to an L1-regularized | |
192 logistic regression model. A benefit of using L1-regularization is | |
193 that it typically yields a weight vector with relatively few non-zero | |
194 coefficients. | |
195 | |
196 For example, say you have a dataset containing M rows (subjects) | |
197 and N columns (attributes) where one of these N attributes is binary, | |
198 indicating whether or not the subject has some property of interest P. | |
199 In simple terms, LPS calculates a weight for each of the other attributes | |
200 in your dataset. This weight indicates how "relevant" that attribute | |
201 is for predicting whether or not a given subject has property P. | |
202 The L1-regularization causes most of these weights to be equal to zero, | |
203 which means LPS will find a "small" subset of the remaining N-1 attributes | |
204 in your dataset that can be used to predict P. | |
205 | |
206 In other words, LPS can be used for feature selection. | |
207 | |
208 The input dataset is tabular, and must contain a label column which | |
209 indicates whether or not a given row has property P. In the current | |
210 version of this tool, P must be encoded using +1 and -1. The Lambda_fac | |
211 parameter ranges from 0 to 1, and controls how sparse the weight | |
212 vector will be. At the low end, when Lambda_fac = 0, there will be | |
213 no regularization. At the high end, when Lambda_fac = 1, there will be | |
214 "too much" regularization, and all of the weights will equal zero. | |
215 | |
216 The LPS tool creates two output datasets. The first, called the results | |
217 file, is a tabular dataset containing one column of weights for each | |
218 value of the regularization parameter lambda that was tried. The weight | |
219 columns are in order from left to right by decreasing values of lambda. | |
220 The first N-1 rows in each column are the weights for the N-1 attributes | |
221 in your input dataset. The final row is a constant, the intercept. | |
222 | |
223 Let **x** be a row from your input dataset and let **b** be a column | |
224 from the results file. To compute the probability that row **x** has | |
225 a label value of +1: | |
226 | |
227 Probability(row **x** has label value = +1) = 1 / [1 + exp{**x** \* **b**\[1..N-1\] + **b**\[N\]}] | |
228 | |
229 where **x** \* **b**\[1..N-1\] represents matrix multiplication. | |
230 | |
231 The second output dataset, called the log file, is a text file which | |
232 contains additional data about the fitted L1-regularized logistic | |
233 regression model. These data include the number of features, the | |
234 computed value of lambda_max, the actual values of lambda used, the | |
235 optimal values of the log-likelihood and regularized log-likelihood | |
236 functions, the number of non-zeros, and the number of iterations. | |
237 | |
238 Website: http://pages.cs.wisc.edu/~swright/LPS/ | |
239 | |
240 ----- | |
241 | |
242 **Example** | |
243 | |
244 - input file:: | |
245 | |
246 +1 1 0 0 0 0 1 0 1 1 ... | |
247 +1 1 1 1 0 0 1 0 1 1 ... | |
248 +1 1 0 1 0 1 0 1 0 1 ... | |
249 etc. | |
250 | |
251 - output results file:: | |
252 | |
253 0 | |
254 0 | |
255 0 | |
256 0 | |
257 0.025541 | |
258 etc. | |
259 | |
260 - output log file:: | |
261 | |
262 Data set has 100 vectors with 50 features. | |
263 calculateLambdaMax: n=50, m=100, m+=50, m-=50 | |
264 computed value of lambda_max: 5.0000e-01 | |
265 | |
266 lambda=2.96e-02 solution: | |
267 optimal log-likelihood function value: 6.46e-01 | |
268 optimal *regularized* log-likelihood function value: 6.79e-01 | |
269 number of nonzeros at the optimum: 5 | |
270 number of iterations required: 43 | |
271 etc. | |
272 | |
273 ----- | |
274 | |
275 **References** | |
276 | |
277 Koh K, Kim S-J, Boyd S. (2007) | |
278 An interior-point method for large-scale l1-regularized logistic regression. | |
279 Journal of Machine Learning Research. 8:1519-1555. | |
280 | |
281 Shi W, Wahba G, Wright S, Lee K, Klein R, Klein B. (2008) | |
282 LASSO-Patternsearch algorithm with application to ophthalmology and genomic data. | |
283 Stat Interface. 1(1):137-153. | |
284 | |
285 <!-- | |
286 Wright S, Novak R, Figueiredo M. (2009) | |
287 Sparse reconstruction via separable approximation. | |
288 IEEE Transactions on Signal Processing. 57:2479-2403. | |
289 | |
290 Shi J, Yin W, Osher S, Sajda P. (2010) | |
291 A fast hybrid algorithm for large scale l1-regularized logistic regression. | |
292 Journal of Machine Learning Research. 11:713-741. | |
293 | |
294 Byrd R, Chin G, Neveitt W, Nocedal J. (2010) | |
295 On the use of stochastic Hessian information in unconstrained optimization. | |
296 Technical Report. Northwestern University. June 16, 2010. | |
297 | |
298 Wright S. (2010) | |
299 Accelerated block-coordinate relaxation for regularized optimization. | |
300 Technical Report. University of Wisconsin. August 10, 2010. | |
301 --> | |
302 | |
303 </help> | |
304 </tool> |