Mercurial > repos > bgruening > sklearn_searchcv
annotate search_model_validation.py @ 7:4368259ff821 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
| author | bgruening | 
|---|---|
| date | Sun, 30 Dec 2018 01:51:27 -0500 | 
| parents | |
| children | 1c4a241bef5c | 
| rev | line source | 
|---|---|
| 
7
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
1 import imblearn | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
2 import json | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
3 import numpy as np | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
4 import os | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
5 import pandas | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
6 import pickle | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
7 import skrebate | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
8 import sklearn | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
9 import sys | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
10 import xgboost | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
11 import warnings | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
12 from imblearn import under_sampling, over_sampling, combine | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
13 from imblearn.pipeline import Pipeline as imbPipeline | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
14 from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
15 feature_selection, gaussian_process, kernel_approximation, metrics, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
16 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
17 svm, linear_model, tree, discriminant_analysis) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
18 from sklearn.exceptions import FitFailedWarning | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
19 from sklearn.externals import joblib | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
20 from utils import get_cv, get_scoring, get_X_y, load_model, read_columns, SafeEval | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
21 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
22 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
23 N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
24 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
25 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
26 def get_search_params(params_builder): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
27 search_params = {} | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
28 safe_eval = SafeEval(load_scipy=True, load_numpy=True) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
29 safe_eval_es = SafeEval(load_estimators=True) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
30 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
31 for p in params_builder['param_set']: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
32 search_p = p['search_param_selector']['search_p'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
33 if search_p.strip() == '': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
34 continue | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
35 param_type = p['search_param_selector']['selected_param_type'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
36 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
37 lst = search_p.split(':') | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
38 assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input." | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
39 literal = lst[1].strip() | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
40 param_name = lst[0].strip() | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
41 if param_name: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
42 if param_name.lower() == 'n_jobs': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
43 sys.exit("Parameter `%s` is invalid for search." %param_name) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
44 elif not param_name.endswith('-'): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
45 ev = safe_eval(literal) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
46 if param_type == 'final_estimator_p': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
47 search_params['estimator__' + param_name] = ev | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
48 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
49 search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
50 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
51 # only for estimator eval, add `-` to the end of param | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
52 #TODO maybe add regular express check | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
53 ev = safe_eval_es(literal) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
54 for obj in ev: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
55 if 'n_jobs' in obj.get_params(): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
56 obj.set_params( n_jobs=N_JOBS ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
57 if param_type == 'final_estimator_p': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
58 search_params['estimator__' + param_name[:-1]] = ev | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
59 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
60 search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
61 elif param_type != 'final_estimator_p': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
62 #TODO regular express check ? | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
63 ev = safe_eval_es(literal) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
64 preprocessors = [preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
65 preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
66 preprocessing.PolynomialFeatures(),preprocessing.RobustScaler(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
67 feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
68 feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
69 feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
70 decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
71 decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
72 decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
73 decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
74 decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
75 decomposition.TruncatedSVD(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
76 kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
77 kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
78 cluster.FeatureAgglomeration(), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
79 skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
80 skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
81 imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
82 imblearn.under_sampling.CondensedNearestNeighbour(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
83 imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
84 imblearn.under_sampling.RepeatedEditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
85 imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
86 imblearn.under_sampling.InstanceHardnessThreshold(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
87 imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
88 imblearn.under_sampling.NeighbourhoodCleaningRule(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
89 imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
90 imblearn.under_sampling.RandomUnderSampler(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
91 imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
92 imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
93 imblearn.over_sampling.RandomOverSampler(random_state=0), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
94 imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
95 imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
96 imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
97 imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
98 imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
99 newlist = [] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
100 for obj in ev: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
101 if obj is None: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
102 newlist.append(None) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
103 elif obj == 'all_0': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
104 newlist.extend(preprocessors[0:36]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
105 elif obj == 'sk_prep_all': # no KernalCenter() | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
106 newlist.extend(preprocessors[0:8]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
107 elif obj == 'fs_all': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
108 newlist.extend(preprocessors[8:15]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
109 elif obj == 'decomp_all': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
110 newlist.extend(preprocessors[15:26]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
111 elif obj == 'k_appr_all': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
112 newlist.extend(preprocessors[26:30]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
113 elif obj == 'reb_all': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
114 newlist.extend(preprocessors[31:36]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
115 elif obj == 'imb_all': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
116 newlist.extend(preprocessors[36:55]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
117 elif type(obj) is int and -1 < obj < len(preprocessors): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
118 newlist.append(preprocessors[obj]) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
119 elif hasattr(obj, 'get_params'): # user object | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
120 if 'n_jobs' in obj.get_params(): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
121 newlist.append( obj.set_params(n_jobs=N_JOBS) ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
122 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
123 newlist.append(obj) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
124 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
125 sys.exit("Unsupported preprocessor type: %r" %(obj)) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
126 search_params['preprocessing_' + param_type[5:6]] = newlist | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
127 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
128 sys.exit("Parameter name of the final estimator can't be skipped!") | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
129 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
130 return search_params | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
131 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
132 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
133 if __name__ == '__main__': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
134 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
135 warnings.simplefilter('ignore') | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
136 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
137 input_json_path = sys.argv[1] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
138 with open(input_json_path, 'r') as param_handler: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
139 params = json.load(param_handler) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
140 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
141 infile_pipeline = sys.argv[2] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
142 infile1 = sys.argv[3] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
143 infile2 = sys.argv[4] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
144 outfile_result = sys.argv[5] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
145 if len(sys.argv) > 6: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
146 outfile_estimator = sys.argv[6] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
147 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
148 outfile_estimator = None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
149 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
150 params_builder = params['search_schemes']['search_params_builder'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
151 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
152 input_type = params['input_options']['selected_input'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
153 if input_type == 'tabular': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
154 header = 'infer' if params['input_options']['header1'] else None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
155 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
156 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
157 c = params['input_options']['column_selector_options_1']['col1'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
158 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
159 c = None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
160 X = read_columns( | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
161 infile1, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
162 c = c, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
163 c_option = column_option, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
164 sep='\t', | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
165 header=header, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
166 parse_dates=True | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
167 ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
168 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
169 X = mmread(open(infile1, 'r')) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
170 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
171 header = 'infer' if params['input_options']['header2'] else None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
172 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
173 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
174 c = params['input_options']['column_selector_options_2']['col2'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
175 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
176 c = None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
177 y = read_columns( | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
178 infile2, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
179 c = c, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
180 c_option = column_option, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
181 sep='\t', | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
182 header=header, | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
183 parse_dates=True | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
184 ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
185 y = y.ravel() | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
186 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
187 optimizer = params['search_schemes']['selected_search_scheme'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
188 optimizer = getattr(model_selection, optimizer) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
189 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
190 options = params['search_schemes']['options'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
191 splitter, groups = get_cv(options.pop('cv_selector')) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
192 if groups is None: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
193 options['cv'] = splitter | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
194 elif groups == '': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
195 options['cv'] = list( splitter.split(X, y, groups=None) ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
196 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
197 options['cv'] = list( splitter.split(X, y, groups=groups) ) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
198 options['n_jobs'] = N_JOBS | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
199 primary_scoring = options['scoring']['primary_scoring'] | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
200 options['scoring'] = get_scoring(options['scoring']) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
201 if options['error_score']: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
202 options['error_score'] = 'raise' | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
203 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
204 options['error_score'] = np.NaN | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
205 if options['refit'] and isinstance(options['scoring'], dict): | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
206 options['refit'] = 'primary' | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
207 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
208 options['pre_dispatch'] = None | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
209 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
210 with open(infile_pipeline, 'rb') as pipeline_handler: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
211 pipeline = load_model(pipeline_handler) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
212 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
213 search_params = get_search_params(params_builder) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
214 searcher = optimizer(pipeline, search_params, **options) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
215 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
216 if options['error_score'] == 'raise': | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
217 searcher.fit(X, y) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
218 else: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
219 warnings.simplefilter('always', FitFailedWarning) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
220 with warnings.catch_warnings(record=True) as w: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
221 try: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
222 searcher.fit(X, y) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
223 except ValueError: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
224 pass | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
225 for warning in w: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
226 print(repr(warning.message)) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
227 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
228 cv_result = pandas.DataFrame(searcher.cv_results_) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
229 cv_result.rename(inplace=True, columns={'mean_test_primary': 'mean_test_'+primary_scoring, 'rank_test_primary': 'rank_test_'+primary_scoring}) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
230 cv_result.to_csv(path_or_buf=outfile_result, sep='\t', header=True, index=False) | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
231 | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
232 if outfile_estimator: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
233 with open(outfile_estimator, 'wb') as output_handler: | 
| 
 
4368259ff821
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
 
bgruening 
parents:  
diff
changeset
 | 
234 pickle.dump(searcher.best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL) | 
