comparison model_validation.xml @ 0:333507faecab draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
author bgruening
date Sat, 28 Apr 2018 18:10:26 -0400
parents
children dd502cb0d567
comparison
equal deleted inserted replaced
-1:000000000000 0:333507faecab
1 <tool id="sklearn_model_validation" name="Model Validation" version="@VERSION@">
2 <description>evaluates estimator performance by cross-validation</description>
3 <macros>
4 <import>main_macros.xml</import>
5 </macros>
6 <expand macro="python_requirements"/>
7 <expand macro="macro_stdio"/>
8 <version_command>echo "@VERSION@"</version_command>
9 <command>
10 <![CDATA[
11 python "$sklearn_model_validation_script" '$inputs'
12 ]]>
13 </command>
14 <configfiles>
15 <inputs name="inputs" />
16 <configfile name="sklearn_model_validation_script">
17 <![CDATA[
18 import sys
19 import json
20 import pandas
21 import pickle
22 import numpy as np
23 import sklearn.model_selection
24 from sklearn import svm, linear_model, ensemble
25
26 @COLUMNS_FUNCTION@
27
28 input_json_path = sys.argv[1]
29 params = json.load(open(input_json_path, "r"))
30
31 input_type = params["input_options"]["selected_input"]
32 if input_type=="tabular":
33 header = 'infer' if params["input_options"]["header1"] else None
34 X = read_columns(
35 "$input_options.infile1",
36 "$input_options.col1",
37 sep='\t',
38 header=header,
39 parse_dates=True
40 )
41 else:
42 X = mmread(open("$input_options.infile1", 'r'))
43
44 header = 'infer' if params["input_options"]["header2"] else None
45 y = read_columns(
46 "$input_options.infile2",
47 "$input_options.col2",
48 sep='\t',
49 header=header,
50 parse_dates=True
51 )
52 y=y.ravel()
53
54 validator = params["model_validation_functions"]["selected_function"]
55 validator = getattr(sklearn.model_selection, validator)
56 options = params["model_validation_functions"]["options"]
57 if 'scoring' in options and options['scoring'] == '':
58 options['scoring'] = None
59
60 estimator=params["model_validation_functions"]["estimator"]
61 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no':
62 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]
63 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
64
65 #if $model_validation_functions.selected_function == 'cross_validate':
66 res = validator(estimator, X, y, **options)
67 rval = res["$model_validation_functions.return_type"]
68
69 #elif $model_validation_functions.selected_function == 'learning_curve':
70 options['train_sizes'] = eval(options['train_sizes'])
71 train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options)
72 rval = eval("$model_validation_functions.return_type")
73
74 #elif $model_validation_functions.selected_function == 'permutation_test_score':
75 score, permutation_scores, pvalue = validator(estimator, X, y, **options)
76 rval = eval("$model_validation_functions.return_type")
77 if "$model_validation_functions.return_type" in ["score", "pvalue"]:
78 rval = [rval]
79
80 #elif $model_validation_functions.selected_function == 'validation_curve':
81 options['param_range'] = eval(options['param_range'])
82 train_scores, test_scores = validator(estimator, X, y, **options)
83 rval = eval("$model_validation_functions.return_type")
84
85 #else:
86 rval = validator(estimator, X, y, **options)
87 #end if
88
89 rval = pandas.DataFrame(rval)
90 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False)
91
92 ]]>
93 </configfile>
94 </configfiles>
95 <inputs>
96 <conditional name="model_validation_functions">
97 <param name="selected_function" type="select" label="Select a model validation function">
98 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option>
99 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option>
100 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option>
101 <option value="learning_curve">learning_curve - Learning curve</option>
102 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option>
103 <option value="validation_curve">validation_curve - Validation curve</option>
104 </param>
105 <when value="cross_validate">
106 <expand macro="feature_selection_estimator" />
107 <conditional name="extra_estimator">
108 <expand macro="feature_selection_extra_estimator" />
109 <expand macro="feature_selection_estimator_choices" />
110 </conditional>
111 <section name="options" title="Other Options" expanded="false">
112 <!--groups-->
113 <expand macro="model_validation_common_options"/>
114 <expand macro="scoring"/>
115 <!--fit_params-->
116 <expand macro="pre_dispatch"/>
117 </section>
118 <param name="return_type" type="select" label="Select a return type">
119 <option value="test_score" selected="true">test_score</option>
120 <option value="train_score">train_score</option>
121 <option value="fit_time">fit_time</option>
122 <option value="score_time">score_time</option>
123 </param>
124 </when>
125 <when value="cross_val_predict">
126 <expand macro="feature_selection_estimator" />
127 <conditional name="extra_estimator">
128 <expand macro="feature_selection_extra_estimator" />
129 <expand macro="feature_selection_estimator_choices" />
130 </conditional>
131 <section name="options" title="Other Options" expanded="false">
132 <!--groups-->
133 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" />
134 <expand macro="n_jobs"/>
135 <expand macro="verbose"/>
136 <!--fit_params-->
137 <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" />
138 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator">
139 <option value="predict" selected="true">predict</option>
140 <option value="predict_proba">predict_proba</option>
141 </param>
142 </section>
143 </when>
144 <when value="cross_val_score">
145 <expand macro="feature_selection_estimator" />
146 <conditional name="extra_estimator">
147 <expand macro="feature_selection_extra_estimator" />
148 <expand macro="feature_selection_estimator_choices" />
149 </conditional>
150 <section name="options" title="Other Options" expanded="false">
151 <!--groups-->
152 <expand macro="model_validation_common_options"/>
153 <expand macro="scoring"/>
154 <!--fit_params-->
155 <expand macro="pre_dispatch"/>
156 </section>
157 </when>
158 <when value="learning_curve">
159 <expand macro="feature_selection_estimator" />
160 <conditional name="extra_estimator">
161 <expand macro="feature_selection_extra_estimator" />
162 <expand macro="feature_selection_estimator_choices" />
163 </conditional>
164 <section name="options" title="Other Options" expanded="false">
165 <!--groups-->
166 <expand macro="model_validation_common_options"/>
167 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/>
168 <expand macro="scoring"/>
169 <param argument="exploit_incremental_learning" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="exploit_incremental_learning" help="Whether to apply incremental learning to speed up fitting of the estimator if supported"/>
170 <expand macro="pre_dispatch"/>
171 <expand macro="shuffle" checked="false" label="shuffle" help="Whether to shuffle training data before taking prefixes"/>
172 <expand macro="random_state"/>
173 </section>
174 <param name="return_type" type="select" label="Select a return type">
175 <option value="train_sizes_abs" selected="true">train_sizes_abs</option>
176 <option value="train_scores">train_scores</option>
177 <option value="test_scores">test_scores</option>
178 </param>
179 </when>
180 <when value="permutation_test_score">
181 <expand macro="feature_selection_estimator" />
182 <conditional name="extra_estimator">
183 <expand macro="feature_selection_extra_estimator" />
184 <expand macro="feature_selection_estimator_choices" />
185 </conditional>
186 <section name="options" title="Other Options" expanded="false">
187 <!--groups-->
188 <expand macro="model_validation_common_options"/>
189 <expand macro="scoring"/>
190 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/>
191 <expand macro="random_state"/>
192 </section>
193 <param name="return_type" type="select" label="Select a return type">
194 <option value="score" selected="true">score</option>
195 <option value="permutation_scores">permutation_scores</option>
196 <option value="pvalue">pvalue</option>
197 </param>
198 </when>
199 <when value="validation_curve">
200 <expand macro="feature_selection_estimator" />
201 <conditional name="extra_estimator">
202 <expand macro="feature_selection_extra_estimator" />
203 <expand macro="feature_selection_estimator_choices" />
204 </conditional>
205 <section name="options" title="Other Options" expanded="false">
206 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/>
207 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/>
208 <!--groups-->
209 <expand macro="model_validation_common_options"/>
210 <expand macro="scoring"/>
211 <expand macro="pre_dispatch"/>
212 </section>
213 <param name="return_type" type="select" label="Select a return type">
214 <option value="train_scores" selected="true">train_scores</option>
215 <option value="test_scores">test_scores</option>
216 </param>
217 </when>
218 </conditional>
219 <expand macro="sl_mixed_input"/>
220 </inputs>
221 <outputs>
222 <data format="tabular" name="outfile"/>
223 </outputs>
224 <tests>
225 <test>
226 <param name="selected_function" value="cross_validate"/>
227 <param name="estimator" value="linear_model.LassoCV()"/>
228 <param name="has_estimator" value="yes"/>
229 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
230 <param name="col1" value="1,2,3,4,5"/>
231 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
232 <param name="col2" value="6"/>
233 <output name="outfile" file="mv_result01.tabular"/>
234 </test>
235 <test>
236 <param name="selected_function" value="cross_val_predict"/>
237 <param name="estimator" value="linear_model.LassoCV()"/>
238 <param name="has_estimator" value="yes"/>
239 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
240 <param name="col1" value="1,2,3,4,5"/>
241 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
242 <param name="col2" value="6"/>
243 <output name="outfile" file="mv_result02.tabular"/>
244 </test>
245 <test>
246 <param name="selected_function" value="cross_val_score"/>
247 <param name="estimator" value="linear_model.LassoCV()"/>
248 <param name="has_estimator" value="yes"/>
249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
250 <param name="col1" value="1,2,3,4,5"/>
251 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
252 <param name="col2" value="6"/>
253 <output name="outfile" file="mv_result03.tabular"/>
254 </test>
255 <test>
256 <param name="selected_function" value="learning_curve"/>
257 <param name="estimator" value="linear_model.LassoCV()"/>
258 <param name="has_estimator" value="yes"/>
259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
260 <param name="header1" value="true" />
261 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
263 <param name="header2" value="true" />
264 <param name="col2" value="1"/>
265 <output name="outfile" file="mv_result04.tabular"/>
266 </test>
267 <test>
268 <param name="selected_function" value="permutation_test_score"/>
269 <param name="estimator" value="linear_model.LassoCV()"/>
270 <param name="has_estimator" value="yes"/>
271 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
272 <param name="col1" value="1,2,3,4,5"/>
273 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
274 <param name="col2" value="6"/>
275 <output name="outfile" file="mv_result05.tabular"/>
276 </test>
277 <test>
278 <param name="selected_function" value="validation_curve"/>
279 <param name="estimator" value="svm.SVC(kernel=&quot;linear&quot;)"/>
280 <param name="has_estimator" value="yes"/>
281 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
282 <param name="header1" value="true" />
283 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
284 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
285 <param name="header2" value="true" />
286 <param name="col2" value="1"/>
287 <param name="return_type" value="test_scores"/>
288 <output name="outfile" file="mv_result06.tabular"/>
289 </test>
290 </tests>
291 <help>
292 <![CDATA[
293 **What it does**
294 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on
295 sklearn.model_selection package.
296 For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_.
297
298 .. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
299 ]]>
300 </help>
301 <expand macro="sklearn_citation"/>
302 </tool>