Mercurial > repos > bgruening > sklearn_stacking_ensemble_models
comparison stacking_ensembles.py @ 2:22560cf810b8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
| author | bgruening | 
|---|---|
| date | Fri, 09 Aug 2019 08:08:15 -0400 | 
| parents | fcc5eaaec401 | 
| children | ae4b593c99f8 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 1:6717e5cc4d05 | 2:22560cf810b8 | 
|---|---|
| 1 import argparse | 1 import argparse | 
| 2 import ast | |
| 2 import json | 3 import json | 
| 4 import mlxtend.regressor | |
| 5 import mlxtend.classifier | |
| 3 import pandas as pd | 6 import pandas as pd | 
| 4 import pickle | 7 import pickle | 
| 5 import xgboost | 8 import sklearn | 
| 9 import sys | |
| 6 import warnings | 10 import warnings | 
| 7 from sklearn import (cluster, compose, decomposition, ensemble, | 11 from sklearn import ensemble | 
| 8 feature_extraction, feature_selection, | |
| 9 gaussian_process, kernel_approximation, metrics, | |
| 10 model_selection, naive_bayes, neighbors, | |
| 11 pipeline, preprocessing, svm, linear_model, | |
| 12 tree, discriminant_analysis) | |
| 13 from sklearn.model_selection._split import check_cv | |
| 14 from feature_selectors import (DyRFE, DyRFECV, | |
| 15 MyPipeline, MyimbPipeline) | |
| 16 from iraps_classifier import (IRAPSCore, IRAPSClassifier, | |
| 17 BinarizeTargetClassifier, | |
| 18 BinarizeTargetRegressor) | |
| 19 from preprocessors import Z_RandomOverSampler | |
| 20 from utils import load_model, get_cv, get_estimator, get_search_params | |
| 21 | 12 | 
| 22 from mlxtend.regressor import StackingCVRegressor, StackingRegressor | 13 from galaxy_ml.utils import (load_model, get_cv, get_estimator, | 
| 23 from mlxtend.classifier import StackingCVClassifier, StackingClassifier | 14 get_search_params) | 
| 24 | 15 | 
| 25 | 16 | 
| 26 warnings.filterwarnings('ignore') | 17 warnings.filterwarnings('ignore') | 
| 27 | 18 | 
| 28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 19 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 
| 49 File path for params output | 40 File path for params output | 
| 50 """ | 41 """ | 
| 51 with open(inputs_path, 'r') as param_handler: | 42 with open(inputs_path, 'r') as param_handler: | 
| 52 params = json.load(param_handler) | 43 params = json.load(param_handler) | 
| 53 | 44 | 
| 45 estimator_type = params['algo_selection']['estimator_type'] | |
| 46 # get base estimators | |
| 54 base_estimators = [] | 47 base_estimators = [] | 
| 55 for idx, base_file in enumerate(base_paths.split(',')): | 48 for idx, base_file in enumerate(base_paths.split(',')): | 
| 56 if base_file and base_file != 'None': | 49 if base_file and base_file != 'None': | 
| 57 with open(base_file, 'rb') as handler: | 50 with open(base_file, 'rb') as handler: | 
| 58 model = load_model(handler) | 51 model = load_model(handler) | 
| 59 else: | 52 else: | 
| 60 estimator_json = (params['base_est_builder'][idx] | 53 estimator_json = (params['base_est_builder'][idx] | 
| 61 ['estimator_selector']) | 54 ['estimator_selector']) | 
| 62 model = get_estimator(estimator_json) | 55 model = get_estimator(estimator_json) | 
| 63 base_estimators.append(model) | |
| 64 | 56 | 
| 65 if meta_path: | 57 if estimator_type.startswith('sklearn'): | 
| 66 with open(meta_path, 'rb') as f: | 58 named = model.__class__.__name__.lower() | 
| 67 meta_estimator = load_model(f) | 59 named = 'base_%d_%s' % (idx, named) | 
| 68 else: | 60 base_estimators.append((named, model)) | 
| 69 estimator_json = params['meta_estimator']['estimator_selector'] | 61 else: | 
| 70 meta_estimator = get_estimator(estimator_json) | 62 base_estimators.append(model) | 
| 63 | |
| 64 # get meta estimator, if applicable | |
| 65 if estimator_type.startswith('mlxtend'): | |
| 66 if meta_path: | |
| 67 with open(meta_path, 'rb') as f: | |
| 68 meta_estimator = load_model(f) | |
| 69 else: | |
| 70 estimator_json = (params['algo_selection'] | |
| 71 ['meta_estimator']['estimator_selector']) | |
| 72 meta_estimator = get_estimator(estimator_json) | |
| 71 | 73 | 
| 72 options = params['algo_selection']['options'] | 74 options = params['algo_selection']['options'] | 
| 73 | 75 | 
| 74 cv_selector = options.pop('cv_selector', None) | 76 cv_selector = options.pop('cv_selector', None) | 
| 75 if cv_selector: | 77 if cv_selector: | 
| 76 splitter, groups = get_cv(cv_selector) | 78 splitter, groups = get_cv(cv_selector) | 
| 77 options['cv'] = splitter | 79 options['cv'] = splitter | 
| 78 # set n_jobs | 80 # set n_jobs | 
| 79 options['n_jobs'] = N_JOBS | 81 options['n_jobs'] = N_JOBS | 
| 80 | 82 | 
| 81 if params['algo_selection']['estimator_type'] == 'StackingCVClassifier': | 83 weights = options.pop('weights', None) | 
| 82 ensemble_estimator = StackingCVClassifier( | 84 if weights: | 
| 85 options['weights'] = ast.literal_eval(weights) | |
| 86 | |
| 87 mod_and_name = estimator_type.split('_') | |
| 88 mod = sys.modules[mod_and_name[0]] | |
| 89 klass = getattr(mod, mod_and_name[1]) | |
| 90 | |
| 91 if estimator_type.startswith('sklearn'): | |
| 92 options['n_jobs'] = N_JOBS | |
| 93 ensemble_estimator = klass(base_estimators, **options) | |
| 94 | |
| 95 elif mod == mlxtend.classifier: | |
| 96 ensemble_estimator = klass( | |
| 83 classifiers=base_estimators, | 97 classifiers=base_estimators, | 
| 84 meta_classifier=meta_estimator, | 98 meta_classifier=meta_estimator, | 
| 85 **options) | 99 **options) | 
| 86 | 100 | 
| 87 elif params['algo_selection']['estimator_type'] == 'StackingClassifier': | |
| 88 ensemble_estimator = StackingClassifier( | |
| 89 classifiers=base_estimators, | |
| 90 meta_classifier=meta_estimator, | |
| 91 **options) | |
| 92 | |
| 93 elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor': | |
| 94 ensemble_estimator = StackingCVRegressor( | |
| 95 regressors=base_estimators, | |
| 96 meta_regressor=meta_estimator, | |
| 97 **options) | |
| 98 | |
| 99 else: | 101 else: | 
| 100 ensemble_estimator = StackingRegressor( | 102 ensemble_estimator = klass( | 
| 101 regressors=base_estimators, | 103 regressors=base_estimators, | 
| 102 meta_regressor=meta_estimator, | 104 meta_regressor=meta_estimator, | 
| 103 **options) | 105 **options) | 
| 104 | 106 | 
| 105 print(ensemble_estimator) | 107 print(ensemble_estimator) | 
