Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 41:6546d7c9f08b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 12:52:25 +0000 |
parents | 19d6c2745d34 |
children |
comparison
equal
deleted
inserted
replaced
40:a07ab242b0b5 | 41:6546d7c9f08b |
---|---|
1 <tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="@PROFILE@"> |
2 <description>for classification and regression</description> | 2 <description>for classification and regression</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
15 <configfile name="ensemble_script"> | 15 <configfile name="ensemble_script"> |
16 <![CDATA[ | 16 <![CDATA[ |
17 import json | 17 import json |
18 import numpy as np | 18 import numpy as np |
19 import pandas | 19 import pandas |
20 import pickle | |
21 import sys | 20 import sys |
22 | 21 |
23 from scipy.io import mmread | 22 from scipy.io import mmread |
24 import sklearn.ensemble | 23 import sklearn.ensemble |
25 from galaxy_ml.utils import load_model, get_X_y | 24 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5 |
25 from galaxy_ml.utils import clean_params, get_X_y | |
26 | 26 |
27 | 27 |
28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) |
29 | 29 |
30 # Get inputs, outputs. | 30 # Get inputs, outputs. |
55 if options["select_max_features"]["max_features"] == "number_input": | 55 if options["select_max_features"]["max_features"] == "number_input": |
56 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] | 56 options["select_max_features"]["max_features"] = options["select_max_features"]["num_max_features"] |
57 options["select_max_features"].pop("num_max_features") | 57 options["select_max_features"].pop("num_max_features") |
58 options["max_features"] = options["select_max_features"]["max_features"] | 58 options["max_features"] = options["select_max_features"]["max_features"] |
59 options.pop("select_max_features") | 59 options.pop("select_max_features") |
60 if "presort" in options: | |
61 if options["presort"] == "true": | |
62 options["presort"] = True | |
63 if options["presort"] == "false": | |
64 options["presort"] = False | |
65 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: | 60 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: |
66 options["min_samples_leaf"] = 1 | 61 options["min_samples_leaf"] = 1 |
67 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | 62 if "min_samples_split" in options and options["min_samples_split"] > 1.0: |
68 options["min_samples_split"] = int(options["min_samples_split"]) | 63 options["min_samples_split"] = int(options["min_samples_split"]) |
69 | 64 |
70 X, y = get_X_y(params, infile1, infile2) | 65 X, y = get_X_y(params, infile1, infile2) |
71 | 66 |
72 my_class = getattr(sklearn.ensemble, algorithm) | 67 my_class = getattr(sklearn.ensemble, algorithm) |
73 estimator = my_class(**options) | 68 estimator = my_class(**options) |
74 estimator.fit(X,y) | 69 estimator.fit(X,y) |
75 with open(outfile_fit, 'wb') as out_handler: | 70 dump_model_to_h5(estimator, outfile_fit) |
76 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
77 | 71 |
78 else: | 72 else: |
79 with open(infile_model, 'rb') as model_handler: | 73 classifier_object = load_model_from_h5(infile_model) |
80 classifier_object = load_model(model_handler) | 74 classifier_object = clean_params(classifier_object) |
81 header = 'infer' if params["selected_tasks"]["header"] else None | 75 header = 'infer' if params["selected_tasks"]["header"] else None |
82 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) | 76 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) |
83 prediction = classifier_object.predict(data) | 77 prediction = classifier_object.predict(data) |
84 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | 78 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) |
85 res = pandas.concat([data, prediction_df], axis=1) | 79 res = pandas.concat([data, prediction_df], axis=1) |
87 | 81 |
88 ]]> | 82 ]]> |
89 </configfile> | 83 </configfile> |
90 </configfiles> | 84 </configfiles> |
91 <inputs> | 85 <inputs> |
92 <expand macro="sl_Conditional" model="zip"> | 86 <expand macro="sl_Conditional" model="h5mlm"> |
93 <param name="selected_algorithm" type="select" label="Select an ensemble method:"> | 87 <param name="selected_algorithm" type="select" label="Select an ensemble method:"> |
94 <option value="RandomForestClassifier" selected="true">Random forest classifier</option> | 88 <option value="RandomForestClassifier" selected="true">Random forest classifier</option> |
95 <option value="AdaBoostClassifier">Ada boost classifier</option> | 89 <option value="AdaBoostClassifier">Ada boost classifier</option> |
96 <option value="GradientBoostingClassifier">Gradient Boosting Classifier</option> | 90 <option value="GradientBoostingClassifier">Gradient Boosting Classifier</option> |
97 <option value="RandomForestRegressor">Random forest regressor</option> | 91 <option value="RandomForestRegressor">Random forest regressor</option> |
151 <expand macro="max_leaf_nodes" /> | 145 <expand macro="max_leaf_nodes" /> |
152 <expand macro="min_impurity_decrease" /> | 146 <expand macro="min_impurity_decrease" /> |
153 <expand macro="verbose" /> | 147 <expand macro="verbose" /> |
154 <expand macro="warm_start" checked="false" /> | 148 <expand macro="warm_start" checked="false" /> |
155 <expand macro="random_state" /> | 149 <expand macro="random_state" /> |
156 <expand macro="presort" /> | |
157 </section> | 150 </section> |
158 </when> | 151 </when> |
159 <when value="RandomForestRegressor"> | 152 <when value="RandomForestRegressor"> |
160 <expand macro="sl_mixed_input" /> | 153 <expand macro="sl_mixed_input" /> |
161 <section name="options" title="Advanced Options" expanded="False"> | 154 <section name="options" title="Advanced Options" expanded="False"> |
214 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" /> | 207 <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" /> |
215 <!--base_estimator=None--> | 208 <!--base_estimator=None--> |
216 <expand macro="verbose" /> | 209 <expand macro="verbose" /> |
217 <expand macro="warm_start" checked="false" /> | 210 <expand macro="warm_start" checked="false" /> |
218 <expand macro="random_state" /> | 211 <expand macro="random_state" /> |
219 <expand macro="presort" /> | |
220 </section> | 212 </section> |
221 </when> | 213 </when> |
222 </expand> | 214 </expand> |
223 </inputs> | 215 </inputs> |
224 | 216 |
234 <param name="selected_algorithm" value="RandomForestClassifier" /> | 226 <param name="selected_algorithm" value="RandomForestClassifier" /> |
235 <param name="random_state" value="10" /> | 227 <param name="random_state" value="10" /> |
236 <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" /> | 228 <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" /> |
237 </test> | 229 </test> |
238 <test> | 230 <test> |
239 <param name="infile_model" value="rfc_model01" ftype="zip" /> | 231 <param name="infile_model" value="rfc_model01" ftype="h5mlm" /> |
240 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 232 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
241 <param name="selected_task" value="load" /> | 233 <param name="selected_task" value="load" /> |
242 <output name="outfile_predict" file="rfc_result01" /> | 234 <output name="outfile_predict" file="rfc_result01" /> |
243 </test> | 235 </test> |
244 <test> | 236 <test> |
250 <param name="selected_algorithm" value="RandomForestRegressor" /> | 242 <param name="selected_algorithm" value="RandomForestRegressor" /> |
251 <param name="random_state" value="10" /> | 243 <param name="random_state" value="10" /> |
252 <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" /> | 244 <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" /> |
253 </test> | 245 </test> |
254 <test> | 246 <test> |
255 <param name="infile_model" value="rfr_model01" ftype="zip" /> | 247 <param name="infile_model" value="rfr_model01" ftype="h5mlm" /> |
256 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> | 248 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> |
257 <param name="selected_task" value="load" /> | 249 <param name="selected_task" value="load" /> |
258 <output name="outfile_predict" file="rfr_result01" /> | 250 <output name="outfile_predict" file="rfr_result01" /> |
259 </test> | 251 </test> |
260 <test> | 252 <test> |
270 <param name="num_max_features" value="0.5" /> | 262 <param name="num_max_features" value="0.5" /> |
271 <param name="random_state" value="42" /> | 263 <param name="random_state" value="42" /> |
272 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" /> | 264 <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" /> |
273 </test> | 265 </test> |
274 <test> | 266 <test> |
275 <param name="infile_model" value="gbr_model01" ftype="zip" /> | 267 <param name="infile_model" value="gbr_model01" ftype="h5mlm" /> |
276 <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" /> | 268 <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" /> |
277 <param name="selected_task" value="load" /> | 269 <param name="selected_task" value="load" /> |
278 <param name="header" value="True" /> | 270 <param name="header" value="True" /> |
279 <output name="outfile_predict" file="gbr_prediction_result01.tabular" /> | 271 <output name="outfile_predict" file="gbr_prediction_result01.tabular" /> |
280 </test> | 272 </test> |
286 <param name="selected_task" value="train" /> | 278 <param name="selected_task" value="train" /> |
287 <param name="selected_algorithm" value="GradientBoostingClassifier" /> | 279 <param name="selected_algorithm" value="GradientBoostingClassifier" /> |
288 <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" /> | 280 <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" /> |
289 </test> | 281 </test> |
290 <test> | 282 <test> |
291 <param name="infile_model" value="gbc_model01" ftype="zip" /> | 283 <param name="infile_model" value="gbc_model01" ftype="h5mlm" /> |
292 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 284 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
293 <param name="selected_task" value="load" /> | 285 <param name="selected_task" value="load" /> |
294 <output name="outfile_predict" file="gbc_result01" /> | 286 <output name="outfile_predict" file="gbc_result01" /> |
295 </test> | 287 </test> |
296 <test> | 288 <test> |
302 <param name="selected_algorithm" value="AdaBoostClassifier" /> | 294 <param name="selected_algorithm" value="AdaBoostClassifier" /> |
303 <param name="random_state" value="10" /> | 295 <param name="random_state" value="10" /> |
304 <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" /> | 296 <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" /> |
305 </test> | 297 </test> |
306 <test> | 298 <test> |
307 <param name="infile_model" value="abc_model01" ftype="zip" /> | 299 <param name="infile_model" value="abc_model01" ftype="h5mlm" /> |
308 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 300 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
309 <param name="selected_task" value="load" /> | 301 <param name="selected_task" value="load" /> |
310 <output name="outfile_predict" file="abc_result01" /> | 302 <output name="outfile_predict" file="abc_result01" /> |
311 </test> | 303 </test> |
312 <test> | 304 <test> |
318 <param name="selected_algorithm" value="AdaBoostRegressor" /> | 310 <param name="selected_algorithm" value="AdaBoostRegressor" /> |
319 <param name="random_state" value="10" /> | 311 <param name="random_state" value="10" /> |
320 <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" /> | 312 <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" /> |
321 </test> | 313 </test> |
322 <test> | 314 <test> |
323 <param name="infile_model" value="abr_model01" ftype="zip" /> | 315 <param name="infile_model" value="abr_model01" ftype="h5mlm" /> |
324 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> | 316 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> |
325 <param name="selected_task" value="load" /> | 317 <param name="selected_task" value="load" /> |
326 <output name="outfile_predict" file="abr_result01" /> | 318 <output name="outfile_predict" file="abr_result01" /> |
327 </test> | 319 </test> |
328 </tests> | 320 </tests> |