Mercurial > repos > bgruening > sklearn_model_validation
comparison stacking_ensembles.py @ 19:efbec977a47d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:26:09 -0400 |
parents | cf9aa11b91c8 |
children | 887e0aaa482e |
comparison
equal
deleted
inserted
replaced
18:492d34a75de6 | 19:efbec977a47d |
---|---|
1 import argparse | 1 import argparse |
2 import ast | |
2 import json | 3 import json |
4 import mlxtend.regressor | |
5 import mlxtend.classifier | |
3 import pandas as pd | 6 import pandas as pd |
4 import pickle | 7 import pickle |
5 import xgboost | 8 import sklearn |
9 import sys | |
6 import warnings | 10 import warnings |
7 from sklearn import (cluster, compose, decomposition, ensemble, | 11 from sklearn import ensemble |
8 feature_extraction, feature_selection, | |
9 gaussian_process, kernel_approximation, metrics, | |
10 model_selection, naive_bayes, neighbors, | |
11 pipeline, preprocessing, svm, linear_model, | |
12 tree, discriminant_analysis) | |
13 from sklearn.model_selection._split import check_cv | |
14 from feature_selectors import (DyRFE, DyRFECV, | |
15 MyPipeline, MyimbPipeline) | |
16 from iraps_classifier import (IRAPSCore, IRAPSClassifier, | |
17 BinarizeTargetClassifier, | |
18 BinarizeTargetRegressor) | |
19 from preprocessors import Z_RandomOverSampler | |
20 from utils import load_model, get_cv, get_estimator, get_search_params | |
21 | 12 |
22 from mlxtend.regressor import StackingCVRegressor, StackingRegressor | 13 from galaxy_ml.utils import (load_model, get_cv, get_estimator, |
23 from mlxtend.classifier import StackingCVClassifier, StackingClassifier | 14 get_search_params) |
24 | 15 |
25 | 16 |
26 warnings.filterwarnings('ignore') | 17 warnings.filterwarnings('ignore') |
27 | 18 |
28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 19 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) |
49 File path for params output | 40 File path for params output |
50 """ | 41 """ |
51 with open(inputs_path, 'r') as param_handler: | 42 with open(inputs_path, 'r') as param_handler: |
52 params = json.load(param_handler) | 43 params = json.load(param_handler) |
53 | 44 |
45 estimator_type = params['algo_selection']['estimator_type'] | |
46 # get base estimators | |
54 base_estimators = [] | 47 base_estimators = [] |
55 for idx, base_file in enumerate(base_paths.split(',')): | 48 for idx, base_file in enumerate(base_paths.split(',')): |
56 if base_file and base_file != 'None': | 49 if base_file and base_file != 'None': |
57 with open(base_file, 'rb') as handler: | 50 with open(base_file, 'rb') as handler: |
58 model = load_model(handler) | 51 model = load_model(handler) |
59 else: | 52 else: |
60 estimator_json = (params['base_est_builder'][idx] | 53 estimator_json = (params['base_est_builder'][idx] |
61 ['estimator_selector']) | 54 ['estimator_selector']) |
62 model = get_estimator(estimator_json) | 55 model = get_estimator(estimator_json) |
63 base_estimators.append(model) | |
64 | 56 |
65 if meta_path: | 57 if estimator_type.startswith('sklearn'): |
66 with open(meta_path, 'rb') as f: | 58 named = model.__class__.__name__.lower() |
67 meta_estimator = load_model(f) | 59 named = 'base_%d_%s' % (idx, named) |
68 else: | 60 base_estimators.append((named, model)) |
69 estimator_json = params['meta_estimator']['estimator_selector'] | 61 else: |
70 meta_estimator = get_estimator(estimator_json) | 62 base_estimators.append(model) |
63 | |
64 # get meta estimator, if applicable | |
65 if estimator_type.startswith('mlxtend'): | |
66 if meta_path: | |
67 with open(meta_path, 'rb') as f: | |
68 meta_estimator = load_model(f) | |
69 else: | |
70 estimator_json = (params['algo_selection'] | |
71 ['meta_estimator']['estimator_selector']) | |
72 meta_estimator = get_estimator(estimator_json) | |
71 | 73 |
72 options = params['algo_selection']['options'] | 74 options = params['algo_selection']['options'] |
73 | 75 |
74 cv_selector = options.pop('cv_selector', None) | 76 cv_selector = options.pop('cv_selector', None) |
75 if cv_selector: | 77 if cv_selector: |
76 splitter, groups = get_cv(cv_selector) | 78 splitter, groups = get_cv(cv_selector) |
77 options['cv'] = splitter | 79 options['cv'] = splitter |
78 # set n_jobs | 80 # set n_jobs |
79 options['n_jobs'] = N_JOBS | 81 options['n_jobs'] = N_JOBS |
80 | 82 |
81 if params['algo_selection']['estimator_type'] == 'StackingCVClassifier': | 83 weights = options.pop('weights', None) |
82 ensemble_estimator = StackingCVClassifier( | 84 if weights: |
85 options['weights'] = ast.literal_eval(weights) | |
86 | |
87 mod_and_name = estimator_type.split('_') | |
88 mod = sys.modules[mod_and_name[0]] | |
89 klass = getattr(mod, mod_and_name[1]) | |
90 | |
91 if estimator_type.startswith('sklearn'): | |
92 options['n_jobs'] = N_JOBS | |
93 ensemble_estimator = klass(base_estimators, **options) | |
94 | |
95 elif mod == mlxtend.classifier: | |
96 ensemble_estimator = klass( | |
83 classifiers=base_estimators, | 97 classifiers=base_estimators, |
84 meta_classifier=meta_estimator, | 98 meta_classifier=meta_estimator, |
85 **options) | 99 **options) |
86 | 100 |
87 elif params['algo_selection']['estimator_type'] == 'StackingClassifier': | |
88 ensemble_estimator = StackingClassifier( | |
89 classifiers=base_estimators, | |
90 meta_classifier=meta_estimator, | |
91 **options) | |
92 | |
93 elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor': | |
94 ensemble_estimator = StackingCVRegressor( | |
95 regressors=base_estimators, | |
96 meta_regressor=meta_estimator, | |
97 **options) | |
98 | |
99 else: | 101 else: |
100 ensemble_estimator = StackingRegressor( | 102 ensemble_estimator = klass( |
101 regressors=base_estimators, | 103 regressors=base_estimators, |
102 meta_regressor=meta_estimator, | 104 meta_regressor=meta_estimator, |
103 **options) | 105 **options) |
104 | 106 |
105 print(ensemble_estimator) | 107 print(ensemble_estimator) |