Mercurial > repos > bgruening > sklearn_data_preprocess
annotate search_model_validation.py @ 23:d6b8103c909c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author | bgruening |
---|---|
date | Sun, 30 Dec 2018 01:58:00 -0500 |
parents | |
children | 9e43ee712723 |
rev | line source |
---|---|
23
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
1 import imblearn |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
2 import json |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
3 import numpy as np |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
4 import os |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
5 import pandas |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
6 import pickle |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
7 import skrebate |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
8 import sklearn |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
9 import sys |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
10 import xgboost |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
11 import warnings |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
12 from imblearn import under_sampling, over_sampling, combine |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
13 from imblearn.pipeline import Pipeline as imbPipeline |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
14 from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
15 feature_selection, gaussian_process, kernel_approximation, metrics, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
16 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
17 svm, linear_model, tree, discriminant_analysis) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
18 from sklearn.exceptions import FitFailedWarning |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
19 from sklearn.externals import joblib |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
20 from utils import get_cv, get_scoring, get_X_y, load_model, read_columns, SafeEval |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
21 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
22 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
23 N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
24 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
25 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
26 def get_search_params(params_builder): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
27 search_params = {} |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
28 safe_eval = SafeEval(load_scipy=True, load_numpy=True) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
29 safe_eval_es = SafeEval(load_estimators=True) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
30 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
31 for p in params_builder['param_set']: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
32 search_p = p['search_param_selector']['search_p'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
33 if search_p.strip() == '': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
34 continue |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
35 param_type = p['search_param_selector']['selected_param_type'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
36 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
37 lst = search_p.split(':') |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
38 assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input." |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
39 literal = lst[1].strip() |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
40 param_name = lst[0].strip() |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
41 if param_name: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
42 if param_name.lower() == 'n_jobs': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
43 sys.exit("Parameter `%s` is invalid for search." %param_name) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
44 elif not param_name.endswith('-'): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
45 ev = safe_eval(literal) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
46 if param_type == 'final_estimator_p': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
47 search_params['estimator__' + param_name] = ev |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
48 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
49 search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
50 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
51 # only for estimator eval, add `-` to the end of param |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
52 #TODO maybe add regular express check |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
53 ev = safe_eval_es(literal) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
54 for obj in ev: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
55 if 'n_jobs' in obj.get_params(): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
56 obj.set_params( n_jobs=N_JOBS ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
57 if param_type == 'final_estimator_p': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
58 search_params['estimator__' + param_name[:-1]] = ev |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
59 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
60 search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
61 elif param_type != 'final_estimator_p': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
62 #TODO regular express check ? |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
63 ev = safe_eval_es(literal) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
64 preprocessors = [preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
65 preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
66 preprocessing.PolynomialFeatures(),preprocessing.RobustScaler(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
67 feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
68 feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
69 feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
70 decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
71 decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
72 decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
73 decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
74 decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
75 decomposition.TruncatedSVD(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
76 kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
77 kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
78 cluster.FeatureAgglomeration(), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
79 skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
80 skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
81 imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
82 imblearn.under_sampling.CondensedNearestNeighbour(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
83 imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
84 imblearn.under_sampling.RepeatedEditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
85 imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
86 imblearn.under_sampling.InstanceHardnessThreshold(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
87 imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
88 imblearn.under_sampling.NeighbourhoodCleaningRule(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
89 imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
90 imblearn.under_sampling.RandomUnderSampler(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
91 imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
92 imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
93 imblearn.over_sampling.RandomOverSampler(random_state=0), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
94 imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
95 imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
96 imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
97 imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
98 imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
99 newlist = [] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
100 for obj in ev: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
101 if obj is None: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
102 newlist.append(None) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
103 elif obj == 'all_0': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
104 newlist.extend(preprocessors[0:36]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
105 elif obj == 'sk_prep_all': # no KernalCenter() |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
106 newlist.extend(preprocessors[0:8]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
107 elif obj == 'fs_all': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
108 newlist.extend(preprocessors[8:15]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
109 elif obj == 'decomp_all': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
110 newlist.extend(preprocessors[15:26]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
111 elif obj == 'k_appr_all': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
112 newlist.extend(preprocessors[26:30]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
113 elif obj == 'reb_all': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
114 newlist.extend(preprocessors[31:36]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
115 elif obj == 'imb_all': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
116 newlist.extend(preprocessors[36:55]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
117 elif type(obj) is int and -1 < obj < len(preprocessors): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
118 newlist.append(preprocessors[obj]) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
119 elif hasattr(obj, 'get_params'): # user object |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
120 if 'n_jobs' in obj.get_params(): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
121 newlist.append( obj.set_params(n_jobs=N_JOBS) ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
122 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
123 newlist.append(obj) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
124 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
125 sys.exit("Unsupported preprocessor type: %r" %(obj)) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
126 search_params['preprocessing_' + param_type[5:6]] = newlist |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
127 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
128 sys.exit("Parameter name of the final estimator can't be skipped!") |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
129 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
130 return search_params |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
131 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
132 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
133 if __name__ == '__main__': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
134 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
135 warnings.simplefilter('ignore') |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
136 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
137 input_json_path = sys.argv[1] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
138 with open(input_json_path, 'r') as param_handler: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
139 params = json.load(param_handler) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
140 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
141 infile_pipeline = sys.argv[2] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
142 infile1 = sys.argv[3] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
143 infile2 = sys.argv[4] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
144 outfile_result = sys.argv[5] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
145 if len(sys.argv) > 6: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
146 outfile_estimator = sys.argv[6] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
147 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
148 outfile_estimator = None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
149 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
150 params_builder = params['search_schemes']['search_params_builder'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
151 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
152 input_type = params['input_options']['selected_input'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
153 if input_type == 'tabular': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
154 header = 'infer' if params['input_options']['header1'] else None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
155 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
156 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
157 c = params['input_options']['column_selector_options_1']['col1'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
158 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
159 c = None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
160 X = read_columns( |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
161 infile1, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
162 c = c, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
163 c_option = column_option, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
164 sep='\t', |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
165 header=header, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
166 parse_dates=True |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
167 ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
168 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
169 X = mmread(open(infile1, 'r')) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
170 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
171 header = 'infer' if params['input_options']['header2'] else None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
172 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
173 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
174 c = params['input_options']['column_selector_options_2']['col2'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
175 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
176 c = None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
177 y = read_columns( |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
178 infile2, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
179 c = c, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
180 c_option = column_option, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
181 sep='\t', |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
182 header=header, |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
183 parse_dates=True |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
184 ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
185 y = y.ravel() |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
186 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
187 optimizer = params['search_schemes']['selected_search_scheme'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
188 optimizer = getattr(model_selection, optimizer) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
189 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
190 options = params['search_schemes']['options'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
191 splitter, groups = get_cv(options.pop('cv_selector')) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
192 if groups is None: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
193 options['cv'] = splitter |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
194 elif groups == '': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
195 options['cv'] = list( splitter.split(X, y, groups=None) ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
196 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
197 options['cv'] = list( splitter.split(X, y, groups=groups) ) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
198 options['n_jobs'] = N_JOBS |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
199 primary_scoring = options['scoring']['primary_scoring'] |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
200 options['scoring'] = get_scoring(options['scoring']) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
201 if options['error_score']: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
202 options['error_score'] = 'raise' |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
203 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
204 options['error_score'] = np.NaN |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
205 if options['refit'] and isinstance(options['scoring'], dict): |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
206 options['refit'] = 'primary' |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
207 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
208 options['pre_dispatch'] = None |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
209 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
210 with open(infile_pipeline, 'rb') as pipeline_handler: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
211 pipeline = load_model(pipeline_handler) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
212 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
213 search_params = get_search_params(params_builder) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
214 searcher = optimizer(pipeline, search_params, **options) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
215 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
216 if options['error_score'] == 'raise': |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
217 searcher.fit(X, y) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
218 else: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
219 warnings.simplefilter('always', FitFailedWarning) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
220 with warnings.catch_warnings(record=True) as w: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
221 try: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
222 searcher.fit(X, y) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
223 except ValueError: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
224 pass |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
225 for warning in w: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
226 print(repr(warning.message)) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
227 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
228 cv_result = pandas.DataFrame(searcher.cv_results_) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
229 cv_result.rename(inplace=True, columns={'mean_test_primary': 'mean_test_'+primary_scoring, 'rank_test_primary': 'rank_test_'+primary_scoring}) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
230 cv_result.to_csv(path_or_buf=outfile_result, sep='\t', header=True, index=False) |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
231 |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
232 if outfile_estimator: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
233 with open(outfile_estimator, 'wb') as output_handler: |
d6b8103c909c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
diff
changeset
|
234 pickle.dump(searcher.best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL) |