comparison ensemble.xml @ 41:6546d7c9f08b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:52:25 +0000
parents 19d6c2745d34
children
comparison
equal deleted inserted replaced
40:a07ab242b0b5 41:6546d7c9f08b
1 <tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="@PROFILE@">
2 <description>for classification and regression</description> 2 <description>for classification and regression</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
15 <configfile name="ensemble_script"> 15 <configfile name="ensemble_script">
16 <![CDATA[ 16 <![CDATA[
17 import json 17 import json
18 import numpy as np 18 import numpy as np
19 import pandas 19 import pandas
20 import pickle
21 import sys 20 import sys
22 21
23 from scipy.io import mmread 22 from scipy.io import mmread
24 import sklearn.ensemble 23 import sklearn.ensemble
25 from galaxy_ml.utils import load_model, get_X_y 24 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5
25 from galaxy_ml.utils import clean_params, get_X_y
26 26
27 27
28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) 28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
29 29
30 # Get inputs, outputs. 30 # Get inputs, outputs.
55 if options["select_max_features"]["max_features"] == "number_input": 55 if options["select_max_features"]["max_features"] == "number_input":
56 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] 56 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"]
57 options["select_max_features"].pop("num_max_features") 57 options["select_max_features"].pop("num_max_features")
58 options["max_features"] = options["select_max_features"]["max_features"] 58 options["max_features"] = options["select_max_features"]["max_features"]
59 options.pop("select_max_features") 59 options.pop("select_max_features")
60 if "presort" in options:
61 if options["presort"] == "true":
62 options["presort"] = True
63 if options["presort"] == "false":
64 options["presort"] = False
65 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: 60 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0:
66 options["min_samples_leaf"] = 1 61 options["min_samples_leaf"] = 1
67 if "min_samples_split" in options and options["min_samples_split"] > 1.0: 62 if "min_samples_split" in options and options["min_samples_split"] > 1.0:
68 options["min_samples_split"] = int(options["min_samples_split"]) 63 options["min_samples_split"] = int(options["min_samples_split"])
69 64
70 X, y = get_X_y(params, infile1, infile2) 65 X, y = get_X_y(params, infile1, infile2)
71 66
72 my_class = getattr(sklearn.ensemble, algorithm) 67 my_class = getattr(sklearn.ensemble, algorithm)
73 estimator = my_class(**options) 68 estimator = my_class(**options)
74 estimator.fit(X,y) 69 estimator.fit(X,y)
75 with open(outfile_fit, 'wb') as out_handler: 70 dump_model_to_h5(estimator, outfile_fit)
76 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
77 71
78 else: 72 else:
79 with open(infile_model, 'rb') as model_handler: 73 classifier_object = load_model_from_h5(infile_model)
80 classifier_object = load_model(model_handler) 74 classifier_object = clean_params(classifier_object)
81 header = 'infer' if params["selected_tasks"]["header"] else None 75 header = 'infer' if params["selected_tasks"]["header"] else None
82 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) 76 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None)
83 prediction = classifier_object.predict(data) 77 prediction = classifier_object.predict(data)
84 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) 78 prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
85 res = pandas.concat([data, prediction_df], axis=1) 79 res = pandas.concat([data, prediction_df], axis=1)
87 81
88 ]]> 82 ]]>
89 </configfile> 83 </configfile>
90 </configfiles> 84 </configfiles>
91 <inputs> 85 <inputs>
92 <expand macro="sl_Conditional" model="zip"> 86 <expand macro="sl_Conditional" model="h5mlm">
93 <param name="selected_algorithm" type="select" label="Select an ensemble method:"> 87 <param name="selected_algorithm" type="select" label="Select an ensemble method:">
94 <option value="RandomForestClassifier" selected="true">Random forest classifier</option> 88 <option value="RandomForestClassifier" selected="true">Random forest classifier</option>
95 <option value="AdaBoostClassifier">Ada boost classifier</option> 89 <option value="AdaBoostClassifier">Ada boost classifier</option>
96 <option value="GradientBoostingClassifier">Gradient Boosting Classifier</option> 90 <option value="GradientBoostingClassifier">Gradient Boosting Classifier</option>
97 <option value="RandomForestRegressor">Random forest regressor</option> 91 <option value="RandomForestRegressor">Random forest regressor</option>
151 <expand macro="max_leaf_nodes" /> 145 <expand macro="max_leaf_nodes" />
152 <expand macro="min_impurity_decrease" /> 146 <expand macro="min_impurity_decrease" />
153 <expand macro="verbose" /> 147 <expand macro="verbose" />
154 <expand macro="warm_start" checked="false" /> 148 <expand macro="warm_start" checked="false" />
155 <expand macro="random_state" /> 149 <expand macro="random_state" />
156 <expand macro="presort" />
157 </section> 150 </section>
158 </when> 151 </when>
159 <when value="RandomForestRegressor"> 152 <when value="RandomForestRegressor">
160 <expand macro="sl_mixed_input" /> 153 <expand macro="sl_mixed_input" />
161 <section name="options" title="Advanced Options" expanded="False"> 154 <section name="options" title="Advanced Options" expanded="False">
214 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" /> 207 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" />
215 <!--base_estimator=None--> 208 <!--base_estimator=None-->
216 <expand macro="verbose" /> 209 <expand macro="verbose" />
217 <expand macro="warm_start" checked="false" /> 210 <expand macro="warm_start" checked="false" />
218 <expand macro="random_state" /> 211 <expand macro="random_state" />
219 <expand macro="presort" />
220 </section> 212 </section>
221 </when> 213 </when>
222 </expand> 214 </expand>
223 </inputs> 215 </inputs>
224 216
234 <param name="selected_algorithm" value="RandomForestClassifier" /> 226 <param name="selected_algorithm" value="RandomForestClassifier" />
235 <param name="random_state" value="10" /> 227 <param name="random_state" value="10" />
236 <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" /> 228 <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" />
237 </test> 229 </test>
238 <test> 230 <test>
239 <param name="infile_model" value="rfc_model01" ftype="zip" /> 231 <param name="infile_model" value="rfc_model01" ftype="h5mlm" />
240 <param name="infile_data" value="test.tabular" ftype="tabular" /> 232 <param name="infile_data" value="test.tabular" ftype="tabular" />
241 <param name="selected_task" value="load" /> 233 <param name="selected_task" value="load" />
242 <output name="outfile_predict" file="rfc_result01" /> 234 <output name="outfile_predict" file="rfc_result01" />
243 </test> 235 </test>
244 <test> 236 <test>
250 <param name="selected_algorithm" value="RandomForestRegressor" /> 242 <param name="selected_algorithm" value="RandomForestRegressor" />
251 <param name="random_state" value="10" /> 243 <param name="random_state" value="10" />
252 <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" /> 244 <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" />
253 </test> 245 </test>
254 <test> 246 <test>
255 <param name="infile_model" value="rfr_model01" ftype="zip" /> 247 <param name="infile_model" value="rfr_model01" ftype="h5mlm" />
256 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> 248 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
257 <param name="selected_task" value="load" /> 249 <param name="selected_task" value="load" />
258 <output name="outfile_predict" file="rfr_result01" /> 250 <output name="outfile_predict" file="rfr_result01" />
259 </test> 251 </test>
260 <test> 252 <test>
270 <param name="num_max_features" value="0.5" /> 262 <param name="num_max_features" value="0.5" />
271 <param name="random_state" value="42" /> 263 <param name="random_state" value="42" />
272 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" /> 264 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" />
273 </test> 265 </test>
274 <test> 266 <test>
275 <param name="infile_model" value="gbr_model01" ftype="zip" /> 267 <param name="infile_model" value="gbr_model01" ftype="h5mlm" />
276 <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" /> 268 <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" />
277 <param name="selected_task" value="load" /> 269 <param name="selected_task" value="load" />
278 <param name="header" value="True" /> 270 <param name="header" value="True" />
279 <output name="outfile_predict" file="gbr_prediction_result01.tabular" /> 271 <output name="outfile_predict" file="gbr_prediction_result01.tabular" />
280 </test> 272 </test>
286 <param name="selected_task" value="train" /> 278 <param name="selected_task" value="train" />
287 <param name="selected_algorithm" value="GradientBoostingClassifier" /> 279 <param name="selected_algorithm" value="GradientBoostingClassifier" />
288 <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" /> 280 <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" />
289 </test> 281 </test>
290 <test> 282 <test>
291 <param name="infile_model" value="gbc_model01" ftype="zip" /> 283 <param name="infile_model" value="gbc_model01" ftype="h5mlm" />
292 <param name="infile_data" value="test.tabular" ftype="tabular" /> 284 <param name="infile_data" value="test.tabular" ftype="tabular" />
293 <param name="selected_task" value="load" /> 285 <param name="selected_task" value="load" />
294 <output name="outfile_predict" file="gbc_result01" /> 286 <output name="outfile_predict" file="gbc_result01" />
295 </test> 287 </test>
296 <test> 288 <test>
302 <param name="selected_algorithm" value="AdaBoostClassifier" /> 294 <param name="selected_algorithm" value="AdaBoostClassifier" />
303 <param name="random_state" value="10" /> 295 <param name="random_state" value="10" />
304 <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" /> 296 <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" />
305 </test> 297 </test>
306 <test> 298 <test>
307 <param name="infile_model" value="abc_model01" ftype="zip" /> 299 <param name="infile_model" value="abc_model01" ftype="h5mlm" />
308 <param name="infile_data" value="test.tabular" ftype="tabular" /> 300 <param name="infile_data" value="test.tabular" ftype="tabular" />
309 <param name="selected_task" value="load" /> 301 <param name="selected_task" value="load" />
310 <output name="outfile_predict" file="abc_result01" /> 302 <output name="outfile_predict" file="abc_result01" />
311 </test> 303 </test>
312 <test> 304 <test>
318 <param name="selected_algorithm" value="AdaBoostRegressor" /> 310 <param name="selected_algorithm" value="AdaBoostRegressor" />
319 <param name="random_state" value="10" /> 311 <param name="random_state" value="10" />
320 <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" /> 312 <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" />
321 </test> 313 </test>
322 <test> 314 <test>
323 <param name="infile_model" value="abr_model01" ftype="zip" /> 315 <param name="infile_model" value="abr_model01" ftype="h5mlm" />
324 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> 316 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
325 <param name="selected_task" value="load" /> 317 <param name="selected_task" value="load" />
326 <output name="outfile_predict" file="abr_result01" /> 318 <output name="outfile_predict" file="abr_result01" />
327 </test> 319 </test>
328 </tests> 320 </tests>