comparison model_validation.xml @ 2:dd502cb0d567 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978
author bgruening
date Wed, 30 May 2018 08:27:01 -0400
parents 333507faecab
children 424d8d21744d
comparison
equal deleted inserted replaced
1:02eadaaa4bf7 2:dd502cb0d567
16 <configfile name="sklearn_model_validation_script"> 16 <configfile name="sklearn_model_validation_script">
17 <![CDATA[ 17 <![CDATA[
18 import sys 18 import sys
19 import json 19 import json
20 import pandas 20 import pandas
21 import ast
21 import pickle 22 import pickle
22 import numpy as np 23 import numpy as np
23 import sklearn.model_selection 24 import sklearn.model_selection
24 from sklearn import svm, linear_model, ensemble 25 from sklearn import svm, linear_model, ensemble
26 from sklearn.pipeline import Pipeline
25 27
26 @COLUMNS_FUNCTION@ 28 @COLUMNS_FUNCTION@
29
30 @FEATURE_SELECTOR_FUNCTION@
27 31
28 input_json_path = sys.argv[1] 32 input_json_path = sys.argv[1]
29 params = json.load(open(input_json_path, "r")) 33 params = json.load(open(input_json_path, "r"))
30 34
31 input_type = params["input_options"]["selected_input"] 35 input_type = params["input_options"]["selected_input"]
49 header=header, 53 header=header,
50 parse_dates=True 54 parse_dates=True
51 ) 55 )
52 y=y.ravel() 56 y=y.ravel()
53 57
54 validator = params["model_validation_functions"]["selected_function"]
55 validator = getattr(sklearn.model_selection, validator)
56 options = params["model_validation_functions"]["options"] 58 options = params["model_validation_functions"]["options"]
57 if 'scoring' in options and options['scoring'] == '': 59 if 'scoring' in options and options['scoring'] == '':
58 options['scoring'] = None 60 options['scoring'] = None
59 61 if 'pre_dispatch' in options and options['pre_dispatch'] == '':
62 options['pre_dispatch'] = None
63
64 pipeline_steps = []
65
66 ## Set up feature selector and add to pipeline steps.
67 if params['feature_selection']['do_feature_selection'] == 'Yes':
68 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms'])
69 pipeline_steps.append( ('feature_selector', feature_selector))
70
71 ## Set up estimator and add to pipeline.
60 estimator=params["model_validation_functions"]["estimator"] 72 estimator=params["model_validation_functions"]["estimator"]
61 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': 73 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no':
62 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] 74 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]
63 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) 75 estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
64 76
65 #if $model_validation_functions.selected_function == 'cross_validate': 77 pipeline_steps.append( ('estimator', estimator) )
66 res = validator(estimator, X, y, **options) 78
67 rval = res["$model_validation_functions.return_type"] 79 pipeline = Pipeline(pipeline_steps)
68 80
69 #elif $model_validation_functions.selected_function == 'learning_curve': 81 ## Set up validator, run pipeline through validator and return results.
70 options['train_sizes'] = eval(options['train_sizes']) 82
71 train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) 83 validator = params["model_validation_functions"]["selected_function"]
72 rval = eval("$model_validation_functions.return_type") 84 validator = getattr(sklearn.model_selection, validator)
73 85
74 #elif $model_validation_functions.selected_function == 'permutation_test_score': 86 selected_function = params["model_validation_functions"]["selected_function"]
75 score, permutation_scores, pvalue = validator(estimator, X, y, **options) 87 rval_type = params["model_validation_functions"].get("return_type", None)
76 rval = eval("$model_validation_functions.return_type") 88
77 if "$model_validation_functions.return_type" in ["score", "pvalue"]: 89 if selected_function == 'cross_validate':
78 rval = [rval] 90 res = validator(pipeline, X, y, **options)
79 91 rval = res[rval_type]
80 #elif $model_validation_functions.selected_function == 'validation_curve': 92 elif selected_function == 'learning_curve':
81 options['param_range'] = eval(options['param_range']) 93 options['train_sizes'] = eval(options['train_sizes'])
82 train_scores, test_scores = validator(estimator, X, y, **options) 94 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options)
83 rval = eval("$model_validation_functions.return_type") 95 rval = eval(rval_type)
84 96 elif selected_function == 'permutation_test_score':
85 #else: 97 score, permutation_scores, pvalue = validator(pipeline, X, y, **options)
86 rval = validator(estimator, X, y, **options) 98 rval = eval(rval_type)
87 #end if 99 if rval_type in ["score", "pvalue"]:
100 rval = [rval]
101 elif selected_function == 'validation_curve':
102 options['param_name'] = 'estimator__' + options['param_name']
103 options['param_range'] = eval(options['param_range'])
104 train_scores, test_scores = validator(pipeline, X, y, **options)
105 rval = eval(rval_type)
106 elif selected_function == 'GridSearchCV':
107 param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","'")\
108 .replace('__dq__','"').replace("__oc__", "{").replace("__cc__", "}")\
109 .replace("__ob__", "[").replace("__cb__", "]")
110 param_grid = ast.literal_eval(param_grid)
111 grid = validator(pipeline, param_grid, **options)
112 grid.fit(X, y)
113 rval = getattr(grid, rval_type)
114 if rval_type in ["best_estimator_", "best_score_", "best_index_"]:
115 rval = [rval]
116 else:
117 rval = validator(pipeline, X, y, **options)
88 118
89 rval = pandas.DataFrame(rval) 119 rval = pandas.DataFrame(rval)
90 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) 120 if rval_type and rval_type == "cv_results_":
121 rval.to_csv(path_or_buf="$outfile", sep='\t', header=True, index=False)
122 else:
123 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False)
91 124
92 ]]> 125 ]]>
93 </configfile> 126 </configfile>
94 </configfiles> 127 </configfiles>
95 <inputs> 128 <inputs>
129 <conditional name="feature_selection">
130 <param name="do_feature_selection" type="select" label="Do feature selection?">
131 <option value="No" selected="true"/>
132 <option value="Yes"/>
133 </param>
134 <when value="No"/>
135 <when value="Yes">
136 <expand macro="feature_selection_all"/>
137 </when>
138 </conditional>
96 <conditional name="model_validation_functions"> 139 <conditional name="model_validation_functions">
97 <param name="selected_function" type="select" label="Select a model validation function"> 140 <param name="selected_function" type="select" label="Select a model validation function">
141 <option value="GridSearchCV">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option>
98 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> 142 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option>
99 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> 143 <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option>
100 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> 144 <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option>
101 <option value="learning_curve">learning_curve - Learning curve</option> 145 <option value="learning_curve">learning_curve - Learning curve</option>
102 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> 146 <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option>
103 <option value="validation_curve">validation_curve - Validation curve</option> 147 <option value="validation_curve">validation_curve - Validation curve</option>
104 </param> 148 </param>
149 <when value="GridSearchCV">
150 <expand macro="estimator_input_no_fit" />
151 <param argument="param_grid" type="text" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]" label="param_grid" help="Dictionary with parameters names (string) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored"/>
152 <section name="options" title="Other Options" expanded="false">
153 <expand macro="scoring"/>
154 <expand macro="model_validation_common_options"/>
155 <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>
156 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="Data is identically distributed?"/>
157 <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
158 <!--error_score-->
159 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/>
160 </section>
161 <param name="return_type" type="select" label="Select a return type">
162 <option value="cv_results_" selected="true">cv_results_</option>
163 <option value="best_estimator_">best_estimator_</option>
164 <option value="best_score_">best_score_</option>
165 <option value="best_params_">best_params_</option>
166 <option value="best_index_">best_index_</option>
167 </param>
168 </when>
105 <when value="cross_validate"> 169 <when value="cross_validate">
106 <expand macro="feature_selection_estimator" /> 170 <expand macro="estimator_input_no_fit" />
107 <conditional name="extra_estimator">
108 <expand macro="feature_selection_extra_estimator" />
109 <expand macro="feature_selection_estimator_choices" />
110 </conditional>
111 <section name="options" title="Other Options" expanded="false"> 171 <section name="options" title="Other Options" expanded="false">
112 <!--groups--> 172 <!--groups-->
113 <expand macro="model_validation_common_options"/> 173 <expand macro="model_validation_common_options"/>
114 <expand macro="scoring"/> 174 <expand macro="scoring"/>
115 <!--fit_params--> 175 <!--fit_params-->
121 <option value="fit_time">fit_time</option> 181 <option value="fit_time">fit_time</option>
122 <option value="score_time">score_time</option> 182 <option value="score_time">score_time</option>
123 </param> 183 </param>
124 </when> 184 </when>
125 <when value="cross_val_predict"> 185 <when value="cross_val_predict">
126 <expand macro="feature_selection_estimator" /> 186 <expand macro="estimator_input_no_fit" />
127 <conditional name="extra_estimator"> 187 <section name="options" title="Other Options" expanded="false">
128 <expand macro="feature_selection_extra_estimator" /> 188 <!--groups-->
129 <expand macro="feature_selection_estimator_choices" /> 189 <expand macro="model_validation_common_options" />
130 </conditional>
131 <section name="options" title="Other Options" expanded="false">
132 <!--groups-->
133 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" />
134 <expand macro="n_jobs"/>
135 <expand macro="verbose"/>
136 <!--fit_params--> 190 <!--fit_params-->
137 <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" /> 191 <expand macro="pre_dispatch" value="2*n_jobs’" help="Controls the number of jobs that get dispatched during parallel execution"/>
138 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> 192 <param argument="method" type="select" label="Invokes the passed method name of the passed estimator">
139 <option value="predict" selected="true">predict</option> 193 <option value="predict" selected="true">predict</option>
140 <option value="predict_proba">predict_proba</option> 194 <option value="predict_proba">predict_proba</option>
141 </param> 195 </param>
142 </section> 196 </section>
143 </when> 197 </when>
144 <when value="cross_val_score"> 198 <when value="cross_val_score">
145 <expand macro="feature_selection_estimator" /> 199 <expand macro="estimator_input_no_fit" />
146 <conditional name="extra_estimator">
147 <expand macro="feature_selection_extra_estimator" />
148 <expand macro="feature_selection_estimator_choices" />
149 </conditional>
150 <section name="options" title="Other Options" expanded="false"> 200 <section name="options" title="Other Options" expanded="false">
151 <!--groups--> 201 <!--groups-->
152 <expand macro="model_validation_common_options"/> 202 <expand macro="model_validation_common_options"/>
153 <expand macro="scoring"/> 203 <expand macro="scoring"/>
154 <!--fit_params--> 204 <!--fit_params-->
155 <expand macro="pre_dispatch"/> 205 <expand macro="pre_dispatch"/>
156 </section> 206 </section>
157 </when> 207 </when>
158 <when value="learning_curve"> 208 <when value="learning_curve">
159 <expand macro="feature_selection_estimator" /> 209 <expand macro="estimator_input_no_fit" />
160 <conditional name="extra_estimator">
161 <expand macro="feature_selection_extra_estimator" />
162 <expand macro="feature_selection_estimator_choices" />
163 </conditional>
164 <section name="options" title="Other Options" expanded="false"> 210 <section name="options" title="Other Options" expanded="false">
165 <!--groups--> 211 <!--groups-->
166 <expand macro="model_validation_common_options"/> 212 <expand macro="model_validation_common_options"/>
167 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> 213 <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/>
168 <expand macro="scoring"/> 214 <expand macro="scoring"/>
176 <option value="train_scores">train_scores</option> 222 <option value="train_scores">train_scores</option>
177 <option value="test_scores">test_scores</option> 223 <option value="test_scores">test_scores</option>
178 </param> 224 </param>
179 </when> 225 </when>
180 <when value="permutation_test_score"> 226 <when value="permutation_test_score">
181 <expand macro="feature_selection_estimator" /> 227 <expand macro="estimator_input_no_fit" />
182 <conditional name="extra_estimator">
183 <expand macro="feature_selection_extra_estimator" />
184 <expand macro="feature_selection_estimator_choices" />
185 </conditional>
186 <section name="options" title="Other Options" expanded="false"> 228 <section name="options" title="Other Options" expanded="false">
187 <!--groups--> 229 <!--groups-->
188 <expand macro="model_validation_common_options"/> 230 <expand macro="model_validation_common_options"/>
189 <expand macro="scoring"/> 231 <expand macro="scoring"/>
190 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> 232 <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/>
195 <option value="permutation_scores">permutation_scores</option> 237 <option value="permutation_scores">permutation_scores</option>
196 <option value="pvalue">pvalue</option> 238 <option value="pvalue">pvalue</option>
197 </param> 239 </param>
198 </when> 240 </when>
199 <when value="validation_curve"> 241 <when value="validation_curve">
200 <expand macro="feature_selection_estimator" /> 242 <expand macro="estimator_input_no_fit" />
201 <conditional name="extra_estimator">
202 <expand macro="feature_selection_extra_estimator" />
203 <expand macro="feature_selection_estimator_choices" />
204 </conditional>
205 <section name="options" title="Other Options" expanded="false"> 243 <section name="options" title="Other Options" expanded="false">
206 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> 244 <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/>
207 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> 245 <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/>
208 <!--groups--> 246 <!--groups-->
209 <expand macro="model_validation_common_options"/> 247 <expand macro="model_validation_common_options"/>
285 <param name="header2" value="true" /> 323 <param name="header2" value="true" />
286 <param name="col2" value="1"/> 324 <param name="col2" value="1"/>
287 <param name="return_type" value="test_scores"/> 325 <param name="return_type" value="test_scores"/>
288 <output name="outfile" file="mv_result06.tabular"/> 326 <output name="outfile" file="mv_result06.tabular"/>
289 </test> 327 </test>
328 <test>
329 <param name="do_feature_selection" value="Yes"/>
330 <param name="selected_algorithm" value="SelectKBest"/>
331 <param name="score_func" value="chi2"/>
332 <param name="selected_function" value="GridSearchCV"/>
333 <param name="estimator" value="svm.SVR(kernel=&quot;linear&quot;)"/>
334 <param name="has_estimator" value="yes"/>
335 <param name="param_grid" value="[{'feature_selector__k': [3, 7], 'estimator__C': [1, 100]}]"/>
336 <param name="return_type" value="best_score_"/>
337 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
338 <param name="header1" value="true" />
339 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
340 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
341 <param name="header2" value="true" />
342 <param name="col2" value="1"/>
343 <output name="outfile" file="mv_result07.tabular"/>
344 </test>
290 </tests> 345 </tests>
291 <help> 346 <help>
292 <![CDATA[ 347 <![CDATA[
293 **What it does** 348 **What it does**
294 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on 349 This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on