Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 0:333507faecab draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
author | bgruening |
---|---|
date | Sat, 28 Apr 2018 18:10:26 -0400 |
parents | |
children | dd502cb0d567 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:333507faecab |
---|---|
1 <tool id="sklearn_model_validation" name="Model Validation" version="@VERSION@"> | |
2 <description>evaluates estimator performance by cross-validation</description> | |
3 <macros> | |
4 <import>main_macros.xml</import> | |
5 </macros> | |
6 <expand macro="python_requirements"/> | |
7 <expand macro="macro_stdio"/> | |
8 <version_command>echo "@VERSION@"</version_command> | |
9 <command> | |
10 <![CDATA[ | |
11 python "$sklearn_model_validation_script" '$inputs' | |
12 ]]> | |
13 </command> | |
14 <configfiles> | |
15 <inputs name="inputs" /> | |
16 <configfile name="sklearn_model_validation_script"> | |
17 <![CDATA[ | |
18 import sys | |
19 import json | |
20 import pandas | |
21 import pickle | |
22 import numpy as np | |
23 import sklearn.model_selection | |
24 from sklearn import svm, linear_model, ensemble | |
25 | |
26 @COLUMNS_FUNCTION@ | |
27 | |
28 input_json_path = sys.argv[1] | |
29 params = json.load(open(input_json_path, "r")) | |
30 | |
31 input_type = params["input_options"]["selected_input"] | |
32 if input_type=="tabular": | |
33 header = 'infer' if params["input_options"]["header1"] else None | |
34 X = read_columns( | |
35 "$input_options.infile1", | |
36 "$input_options.col1", | |
37 sep='\t', | |
38 header=header, | |
39 parse_dates=True | |
40 ) | |
41 else: | |
42 X = mmread(open("$input_options.infile1", 'r')) | |
43 | |
44 header = 'infer' if params["input_options"]["header2"] else None | |
45 y = read_columns( | |
46 "$input_options.infile2", | |
47 "$input_options.col2", | |
48 sep='\t', | |
49 header=header, | |
50 parse_dates=True | |
51 ) | |
52 y=y.ravel() | |
53 | |
54 validator = params["model_validation_functions"]["selected_function"] | |
55 validator = getattr(sklearn.model_selection, validator) | |
56 options = params["model_validation_functions"]["options"] | |
57 if 'scoring' in options and options['scoring'] == '': | |
58 options['scoring'] = None | |
59 | |
60 estimator=params["model_validation_functions"]["estimator"] | |
61 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': | |
62 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] | |
63 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) | |
64 | |
65 #if $model_validation_functions.selected_function == 'cross_validate': | |
66 res = validator(estimator, X, y, **options) | |
67 rval = res["$model_validation_functions.return_type"] | |
68 | |
69 #elif $model_validation_functions.selected_function == 'learning_curve': | |
70 options['train_sizes'] = eval(options['train_sizes']) | |
71 train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) | |
72 rval = eval("$model_validation_functions.return_type") | |
73 | |
74 #elif $model_validation_functions.selected_function == 'permutation_test_score': | |
75 score, permutation_scores, pvalue = validator(estimator, X, y, **options) | |
76 rval = eval("$model_validation_functions.return_type") | |
77 if "$model_validation_functions.return_type" in ["score", "pvalue"]: | |
78 rval = [rval] | |
79 | |
80 #elif $model_validation_functions.selected_function == 'validation_curve': | |
81 options['param_range'] = eval(options['param_range']) | |
82 train_scores, test_scores = validator(estimator, X, y, **options) | |
83 rval = eval("$model_validation_functions.return_type") | |
84 | |
85 #else: | |
86 rval = validator(estimator, X, y, **options) | |
87 #end if | |
88 | |
89 rval = pandas.DataFrame(rval) | |
90 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | |
91 | |
92 ]]> | |
93 </configfile> | |
94 </configfiles> | |
95 <inputs> | |
96 <conditional name="model_validation_functions"> | |
97 <param name="selected_function" type="select" label="Select a model validation function"> | |
98 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> | |
99 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> | |
100 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> | |
101 <option value="learning_curve">learning_curve - Learning curve</option> | |
102 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> | |
103 <option value="validation_curve">validation_curve - Validation curve</option> | |
104 </param> | |
105 <when value="cross_validate"> | |
106 <expand macro="feature_selection_estimator" /> | |
107 <conditional name="extra_estimator"> | |
108 <expand macro="feature_selection_extra_estimator" /> | |
109 <expand macro="feature_selection_estimator_choices" /> | |
110 </conditional> | |
111 <section name="options" title="Other Options" expanded="false"> | |
112 <!--groups--> | |
113 <expand macro="model_validation_common_options"/> | |
114 <expand macro="scoring"/> | |
115 <!--fit_params--> | |
116 <expand macro="pre_dispatch"/> | |
117 </section> | |
118 <param name="return_type" type="select" label="Select a return type"> | |
119 <option value="test_score" selected="true">test_score</option> | |
120 <option value="train_score">train_score</option> | |
121 <option value="fit_time">fit_time</option> | |
122 <option value="score_time">score_time</option> | |
123 </param> | |
124 </when> | |
125 <when value="cross_val_predict"> | |
126 <expand macro="feature_selection_estimator" /> | |
127 <conditional name="extra_estimator"> | |
128 <expand macro="feature_selection_extra_estimator" /> | |
129 <expand macro="feature_selection_estimator_choices" /> | |
130 </conditional> | |
131 <section name="options" title="Other Options" expanded="false"> | |
132 <!--groups--> | |
133 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" /> | |
134 <expand macro="n_jobs"/> | |
135 <expand macro="verbose"/> | |
136 <!--fit_params--> | |
137 <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" /> | |
138 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> | |
139 <option value="predict" selected="true">predict</option> | |
140 <option value="predict_proba">predict_proba</option> | |
141 </param> | |
142 </section> | |
143 </when> | |
144 <when value="cross_val_score"> | |
145 <expand macro="feature_selection_estimator" /> | |
146 <conditional name="extra_estimator"> | |
147 <expand macro="feature_selection_extra_estimator" /> | |
148 <expand macro="feature_selection_estimator_choices" /> | |
149 </conditional> | |
150 <section name="options" title="Other Options" expanded="false"> | |
151 <!--groups--> | |
152 <expand macro="model_validation_common_options"/> | |
153 <expand macro="scoring"/> | |
154 <!--fit_params--> | |
155 <expand macro="pre_dispatch"/> | |
156 </section> | |
157 </when> | |
158 <when value="learning_curve"> | |
159 <expand macro="feature_selection_estimator" /> | |
160 <conditional name="extra_estimator"> | |
161 <expand macro="feature_selection_extra_estimator" /> | |
162 <expand macro="feature_selection_estimator_choices" /> | |
163 </conditional> | |
164 <section name="options" title="Other Options" expanded="false"> | |
165 <!--groups--> | |
166 <expand macro="model_validation_common_options"/> | |
167 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> | |
168 <expand macro="scoring"/> | |
169 <param argument="exploit_incremental_learning" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="exploit_incremental_learning" help="Whether to apply incremental learning to speed up fitting of the estimator if supported"/> | |
170 <expand macro="pre_dispatch"/> | |
171 <expand macro="shuffle" checked="false" label="shuffle" help="Whether to shuffle training data before taking prefixes"/> | |
172 <expand macro="random_state"/> | |
173 </section> | |
174 <param name="return_type" type="select" label="Select a return type"> | |
175 <option value="train_sizes_abs" selected="true">train_sizes_abs</option> | |
176 <option value="train_scores">train_scores</option> | |
177 <option value="test_scores">test_scores</option> | |
178 </param> | |
179 </when> | |
180 <when value="permutation_test_score"> | |
181 <expand macro="feature_selection_estimator" /> | |
182 <conditional name="extra_estimator"> | |
183 <expand macro="feature_selection_extra_estimator" /> | |
184 <expand macro="feature_selection_estimator_choices" /> | |
185 </conditional> | |
186 <section name="options" title="Other Options" expanded="false"> | |
187 <!--groups--> | |
188 <expand macro="model_validation_common_options"/> | |
189 <expand macro="scoring"/> | |
190 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> | |
191 <expand macro="random_state"/> | |
192 </section> | |
193 <param name="return_type" type="select" label="Select a return type"> | |
194 <option value="score" selected="true">score</option> | |
195 <option value="permutation_scores">permutation_scores</option> | |
196 <option value="pvalue">pvalue</option> | |
197 </param> | |
198 </when> | |
199 <when value="validation_curve"> | |
200 <expand macro="feature_selection_estimator" /> | |
201 <conditional name="extra_estimator"> | |
202 <expand macro="feature_selection_extra_estimator" /> | |
203 <expand macro="feature_selection_estimator_choices" /> | |
204 </conditional> | |
205 <section name="options" title="Other Options" expanded="false"> | |
206 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> | |
207 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> | |
208 <!--groups--> | |
209 <expand macro="model_validation_common_options"/> | |
210 <expand macro="scoring"/> | |
211 <expand macro="pre_dispatch"/> | |
212 </section> | |
213 <param name="return_type" type="select" label="Select a return type"> | |
214 <option value="train_scores" selected="true">train_scores</option> | |
215 <option value="test_scores">test_scores</option> | |
216 </param> | |
217 </when> | |
218 </conditional> | |
219 <expand macro="sl_mixed_input"/> | |
220 </inputs> | |
221 <outputs> | |
222 <data format="tabular" name="outfile"/> | |
223 </outputs> | |
224 <tests> | |
225 <test> | |
226 <param name="selected_function" value="cross_validate"/> | |
227 <param name="estimator" value="linear_model.LassoCV()"/> | |
228 <param name="has_estimator" value="yes"/> | |
229 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
230 <param name="col1" value="1,2,3,4,5"/> | |
231 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
232 <param name="col2" value="6"/> | |
233 <output name="outfile" file="mv_result01.tabular"/> | |
234 </test> | |
235 <test> | |
236 <param name="selected_function" value="cross_val_predict"/> | |
237 <param name="estimator" value="linear_model.LassoCV()"/> | |
238 <param name="has_estimator" value="yes"/> | |
239 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
240 <param name="col1" value="1,2,3,4,5"/> | |
241 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
242 <param name="col2" value="6"/> | |
243 <output name="outfile" file="mv_result02.tabular"/> | |
244 </test> | |
245 <test> | |
246 <param name="selected_function" value="cross_val_score"/> | |
247 <param name="estimator" value="linear_model.LassoCV()"/> | |
248 <param name="has_estimator" value="yes"/> | |
249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
250 <param name="col1" value="1,2,3,4,5"/> | |
251 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
252 <param name="col2" value="6"/> | |
253 <output name="outfile" file="mv_result03.tabular"/> | |
254 </test> | |
255 <test> | |
256 <param name="selected_function" value="learning_curve"/> | |
257 <param name="estimator" value="linear_model.LassoCV()"/> | |
258 <param name="has_estimator" value="yes"/> | |
259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
260 <param name="header1" value="true" /> | |
261 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
263 <param name="header2" value="true" /> | |
264 <param name="col2" value="1"/> | |
265 <output name="outfile" file="mv_result04.tabular"/> | |
266 </test> | |
267 <test> | |
268 <param name="selected_function" value="permutation_test_score"/> | |
269 <param name="estimator" value="linear_model.LassoCV()"/> | |
270 <param name="has_estimator" value="yes"/> | |
271 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
272 <param name="col1" value="1,2,3,4,5"/> | |
273 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
274 <param name="col2" value="6"/> | |
275 <output name="outfile" file="mv_result05.tabular"/> | |
276 </test> | |
277 <test> | |
278 <param name="selected_function" value="validation_curve"/> | |
279 <param name="estimator" value="svm.SVC(kernel="linear")"/> | |
280 <param name="has_estimator" value="yes"/> | |
281 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
282 <param name="header1" value="true" /> | |
283 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
284 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
285 <param name="header2" value="true" /> | |
286 <param name="col2" value="1"/> | |
287 <param name="return_type" value="test_scores"/> | |
288 <output name="outfile" file="mv_result06.tabular"/> | |
289 </test> | |
290 </tests> | |
291 <help> | |
292 <![CDATA[ | |
293 **What it does** | |
294 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on | |
295 sklearn.model_selection package. | |
296 For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_. | |
297 | |
298 .. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics | |
299 ]]> | |
300 </help> | |
301 <expand macro="sklearn_citation"/> | |
302 </tool> |