comparison stacking_ensembles.py @ 26:9d3a024cf2da draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:07:13 -0400
parents b628de0d101f
children 63417d0acc72
comparison
equal deleted inserted replaced
25:bf32512b1418 26:9d3a024cf2da
1 import argparse 1 import argparse
2 import ast
2 import json 3 import json
4 import mlxtend.regressor
5 import mlxtend.classifier
3 import pandas as pd 6 import pandas as pd
4 import pickle 7 import pickle
5 import xgboost 8 import sklearn
9 import sys
6 import warnings 10 import warnings
7 from sklearn import (cluster, compose, decomposition, ensemble, 11 from sklearn import ensemble
8 feature_extraction, feature_selection,
9 gaussian_process, kernel_approximation, metrics,
10 model_selection, naive_bayes, neighbors,
11 pipeline, preprocessing, svm, linear_model,
12 tree, discriminant_analysis)
13 from sklearn.model_selection._split import check_cv
14 from feature_selectors import (DyRFE, DyRFECV,
15 MyPipeline, MyimbPipeline)
16 from iraps_classifier import (IRAPSCore, IRAPSClassifier,
17 BinarizeTargetClassifier,
18 BinarizeTargetRegressor)
19 from preprocessors import Z_RandomOverSampler
20 from utils import load_model, get_cv, get_estimator, get_search_params
21 12
22 from mlxtend.regressor import StackingCVRegressor, StackingRegressor 13 from galaxy_ml.utils import (load_model, get_cv, get_estimator,
23 from mlxtend.classifier import StackingCVClassifier, StackingClassifier 14 get_search_params)
24 15
25 16
26 warnings.filterwarnings('ignore') 17 warnings.filterwarnings('ignore')
27 18
28 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) 19 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
49 File path for params output 40 File path for params output
50 """ 41 """
51 with open(inputs_path, 'r') as param_handler: 42 with open(inputs_path, 'r') as param_handler:
52 params = json.load(param_handler) 43 params = json.load(param_handler)
53 44
45 estimator_type = params['algo_selection']['estimator_type']
46 # get base estimators
54 base_estimators = [] 47 base_estimators = []
55 for idx, base_file in enumerate(base_paths.split(',')): 48 for idx, base_file in enumerate(base_paths.split(',')):
56 if base_file and base_file != 'None': 49 if base_file and base_file != 'None':
57 with open(base_file, 'rb') as handler: 50 with open(base_file, 'rb') as handler:
58 model = load_model(handler) 51 model = load_model(handler)
59 else: 52 else:
60 estimator_json = (params['base_est_builder'][idx] 53 estimator_json = (params['base_est_builder'][idx]
61 ['estimator_selector']) 54 ['estimator_selector'])
62 model = get_estimator(estimator_json) 55 model = get_estimator(estimator_json)
63 base_estimators.append(model)
64 56
65 if meta_path: 57 if estimator_type.startswith('sklearn'):
66 with open(meta_path, 'rb') as f: 58 named = model.__class__.__name__.lower()
67 meta_estimator = load_model(f) 59 named = 'base_%d_%s' % (idx, named)
68 else: 60 base_estimators.append((named, model))
69 estimator_json = params['meta_estimator']['estimator_selector'] 61 else:
70 meta_estimator = get_estimator(estimator_json) 62 base_estimators.append(model)
63
64 # get meta estimator, if applicable
65 if estimator_type.startswith('mlxtend'):
66 if meta_path:
67 with open(meta_path, 'rb') as f:
68 meta_estimator = load_model(f)
69 else:
70 estimator_json = (params['algo_selection']
71 ['meta_estimator']['estimator_selector'])
72 meta_estimator = get_estimator(estimator_json)
71 73
72 options = params['algo_selection']['options'] 74 options = params['algo_selection']['options']
73 75
74 cv_selector = options.pop('cv_selector', None) 76 cv_selector = options.pop('cv_selector', None)
75 if cv_selector: 77 if cv_selector:
76 splitter, groups = get_cv(cv_selector) 78 splitter, groups = get_cv(cv_selector)
77 options['cv'] = splitter 79 options['cv'] = splitter
78 # set n_jobs 80 # set n_jobs
79 options['n_jobs'] = N_JOBS 81 options['n_jobs'] = N_JOBS
80 82
81 if params['algo_selection']['estimator_type'] == 'StackingCVClassifier': 83 weights = options.pop('weights', None)
82 ensemble_estimator = StackingCVClassifier( 84 if weights:
85 options['weights'] = ast.literal_eval(weights)
86
87 mod_and_name = estimator_type.split('_')
88 mod = sys.modules[mod_and_name[0]]
89 klass = getattr(mod, mod_and_name[1])
90
91 if estimator_type.startswith('sklearn'):
92 options['n_jobs'] = N_JOBS
93 ensemble_estimator = klass(base_estimators, **options)
94
95 elif mod == mlxtend.classifier:
96 ensemble_estimator = klass(
83 classifiers=base_estimators, 97 classifiers=base_estimators,
84 meta_classifier=meta_estimator, 98 meta_classifier=meta_estimator,
85 **options) 99 **options)
86 100
87 elif params['algo_selection']['estimator_type'] == 'StackingClassifier':
88 ensemble_estimator = StackingClassifier(
89 classifiers=base_estimators,
90 meta_classifier=meta_estimator,
91 **options)
92
93 elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor':
94 ensemble_estimator = StackingCVRegressor(
95 regressors=base_estimators,
96 meta_regressor=meta_estimator,
97 **options)
98
99 else: 101 else:
100 ensemble_estimator = StackingRegressor( 102 ensemble_estimator = klass(
101 regressors=base_estimators, 103 regressors=base_estimators,
102 meta_regressor=meta_estimator, 104 meta_regressor=meta_estimator,
103 **options) 105 **options)
104 106
105 print(ensemble_estimator) 107 print(ensemble_estimator)