Mercurial > repos > bgruening > sklearn_ensemble
annotate utils.py @ 23:39ae276e75d9 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author | bgruening |
---|---|
date | Sun, 30 Dec 2018 01:56:11 -0500 |
parents | 9ce3e347506c |
children | e94395c672bd |
rev | line source |
---|---|
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
1 import json |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
2 import numpy as np |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
3 import os |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
4 import pandas |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
5 import pickle |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
6 import re |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
7 import scipy |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
8 import sklearn |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
9 import sys |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
10 import warnings |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
11 import xgboost |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
12 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
13 from asteval import Interpreter, make_symbol_table |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
14 from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction, |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
15 feature_selection, gaussian_process, kernel_approximation, metrics, |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
16 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
17 svm, linear_model, tree, discriminant_analysis) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
18 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
19 try: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
20 import skrebate |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
21 except ModuleNotFoundError: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
22 pass |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
23 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
24 |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
25 N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
26 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
27 try: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
28 sk_whitelist |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
29 except NameError: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
30 sk_whitelist = None |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
31 |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
32 |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
33 class SafePickler(pickle.Unpickler): |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
34 """ |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
35 Used to safely deserialize scikit-learn model objects serialized by cPickle.dump |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
36 Usage: |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
37 eg.: SafePickler.load(pickled_file_object) |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
38 """ |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
39 def find_class(self, module, name): |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
40 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
41 # sk_whitelist could be read from tool |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
42 global sk_whitelist |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
43 if not sk_whitelist: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
44 whitelist_file = os.path.join(os.path.dirname(__file__), 'sk_whitelist.json') |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
45 with open(whitelist_file, 'r') as f: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
46 sk_whitelist = json.load(f) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
47 |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
48 bad_names = ('and', 'as', 'assert', 'break', 'class', 'continue', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
49 'def', 'del', 'elif', 'else', 'except', 'exec', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
50 'finally', 'for', 'from', 'global', 'if', 'import', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
51 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
52 'raise', 'return', 'try', 'system', 'while', 'with', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
53 'True', 'False', 'None', 'eval', 'execfile', '__import__', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
54 '__package__', '__subclasses__', '__bases__', '__globals__', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
55 '__code__', '__closure__', '__func__', '__self__', '__module__', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
56 '__dict__', '__class__', '__call__', '__get__', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
57 '__getattribute__', '__subclasshook__', '__new__', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
58 '__init__', 'func_globals', 'func_code', 'func_closure', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
59 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
60 '__asteval__', 'f_locals', '__mro__') |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
61 good_names = ['copy_reg._reconstructor', '__builtin__.object'] |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
62 |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
63 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name): |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
64 fullname = module + '.' + name |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
65 if (fullname in good_names)\ |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
66 or ( ( module.startswith('sklearn.') |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
67 or module.startswith('xgboost.') |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
68 or module.startswith('skrebate.') |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
69 or module.startswith('imblearn') |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
70 or module.startswith('numpy.') |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
71 or module == 'numpy' |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
72 ) |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
73 and (name not in bad_names) |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
74 ): |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
75 # TODO: replace with a whitelist checker |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
76 if fullname not in sk_whitelist['SK_NAMES'] + sk_whitelist['SKR_NAMES'] + sk_whitelist['XGB_NAMES'] + sk_whitelist['NUMPY_NAMES'] + sk_whitelist['IMBLEARN_NAMES'] + good_names: |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
77 print("Warning: global %s is not in pickler whitelist yet and will loss support soon. Contact tool author or leave a message at github.com" % fullname) |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
78 mod = sys.modules[module] |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
79 return getattr(mod, name) |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
80 |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
81 raise pickle.UnpicklingError("global '%s' is forbidden" % fullname) |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
82 |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
83 |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
84 def load_model(file): |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
85 return SafePickler(file).load() |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
86 |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
87 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
88 def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args): |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
89 data = pandas.read_csv(f, **args) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
90 if c_option == 'by_index_number': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
91 cols = list(map(lambda x: x - 1, c)) |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
92 data = data.iloc[:, cols] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
93 if c_option == 'all_but_by_index_number': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
94 cols = list(map(lambda x: x - 1, c)) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
95 data.drop(data.columns[cols], axis=1, inplace=True) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
96 if c_option == 'by_header_name': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
97 cols = [e.strip() for e in c.split(',')] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
98 data = data[cols] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
99 if c_option == 'all_but_by_header_name': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
100 cols = [e.strip() for e in c.split(',')] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
101 data.drop(cols, axis=1, inplace=True) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
102 y = data.values |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
103 if return_df: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
104 return y, data |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
105 else: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
106 return y |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
107 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
108 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
109 ## generate an instance for one of sklearn.feature_selection classes |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
110 def feature_selector(inputs): |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
111 selector = inputs['selected_algorithm'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
112 selector = getattr(sklearn.feature_selection, selector) |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
113 options = inputs['options'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
114 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
115 if inputs['selected_algorithm'] == 'SelectFromModel': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
116 if not options['threshold'] or options['threshold'] == 'None': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
117 options['threshold'] = None |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
118 else: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
119 try: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
120 options['threshold'] = float(options['threshold']) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
121 except ValueError: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
122 pass |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
123 if inputs['model_inputter']['input_mode'] == 'prefitted': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
124 model_file = inputs['model_inputter']['fitted_estimator'] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
125 with open(model_file, 'rb') as model_handler: |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
126 fitted_estimator = load_model(model_handler) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
127 new_selector = selector(fitted_estimator, prefit=True, **options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
128 else: |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
129 estimator_json = inputs['model_inputter']['estimator_selector'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
130 estimator = get_estimator(estimator_json) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
131 new_selector = selector(estimator, **options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
132 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
133 elif inputs['selected_algorithm'] == 'RFE': |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
134 estimator = get_estimator(inputs['estimator_selector']) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
135 step = options.get('step', None) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
136 if step and step >= 1.0: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
137 options['step'] = int(step) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
138 new_selector = selector(estimator, **options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
139 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
140 elif inputs['selected_algorithm'] == 'RFECV': |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
141 options['scoring'] = get_scoring(options['scoring']) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
142 options['n_jobs'] = N_JOBS |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
143 splitter, groups = get_cv(options.pop('cv_selector')) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
144 # TODO support group cv splitters |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
145 options['cv'] = splitter |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
146 step = options.get('step', None) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
147 if step and step >= 1.0: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
148 options['step'] = int(step) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
149 estimator = get_estimator(inputs['estimator_selector']) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
150 new_selector = selector(estimator, **options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
151 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
152 elif inputs['selected_algorithm'] == 'VarianceThreshold': |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
153 new_selector = selector(**options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
154 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
155 else: |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
156 score_func = inputs['score_func'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
157 score_func = getattr(sklearn.feature_selection, score_func) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
158 new_selector = selector(score_func, **options) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
159 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
160 return new_selector |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
161 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
162 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
163 def get_X_y(params, file1, file2): |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
164 input_type = params['selected_tasks']['selected_algorithms']['input_options']['selected_input'] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
165 if input_type == 'tabular': |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
166 header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header1'] else None |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
167 column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['selected_column_selector_option'] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
168 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
169 c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['col1'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
170 else: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
171 c = None |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
172 X = read_columns( |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
173 file1, |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
174 c=c, |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
175 c_option=column_option, |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
176 sep='\t', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
177 header=header, |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
178 parse_dates=True |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
179 ) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
180 else: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
181 X = mmread(file1) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
182 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
183 header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header2'] else None |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
184 column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
185 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
186 c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['col2'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
187 else: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
188 c = None |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
189 y = read_columns( |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
190 file2, |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
191 c=c, |
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
192 c_option=column_option, |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
193 sep='\t', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
194 header=header, |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
195 parse_dates=True |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
196 ) |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
197 y = y.ravel() |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
198 return X, y |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
199 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
200 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
201 class SafeEval(Interpreter): |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
202 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
203 def __init__(self, load_scipy=False, load_numpy=False, load_estimators=False): |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
204 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
205 # File opening and other unneeded functions could be dropped |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
206 unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr'] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
207 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
208 # Allowed symbol table. Add more if needed. |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
209 new_syms = { |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
210 'np_arange': getattr(np, 'arange'), |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
211 'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier') |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
212 } |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
213 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
214 syms = make_symbol_table(use_numpy=False, **new_syms) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
215 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
216 if load_scipy: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
217 scipy_distributions = scipy.stats.distributions.__dict__ |
20
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
218 for k, v in scipy_distributions.items(): |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
219 if isinstance(v, (scipy.stats.rv_continuous, scipy.stats.rv_discrete)): |
038cecaa9e7c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
220 syms['scipy_stats_' + k] = v |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
221 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
222 if load_numpy: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
223 from_numpy_random = ['beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
224 'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
225 'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
226 'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
227 'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
228 'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
229 'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential', |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
230 'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform', |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
231 'vonmises', 'wald', 'weibull', 'zipf'] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
232 for f in from_numpy_random: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
233 syms['np_random_' + f] = getattr(np.random, f) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
234 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
235 if load_estimators: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
236 estimator_table = { |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
237 'sklearn_svm' : getattr(sklearn, 'svm'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
238 'sklearn_tree' : getattr(sklearn, 'tree'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
239 'sklearn_ensemble' : getattr(sklearn, 'ensemble'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
240 'sklearn_neighbors' : getattr(sklearn, 'neighbors'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
241 'sklearn_naive_bayes' : getattr(sklearn, 'naive_bayes'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
242 'sklearn_linear_model' : getattr(sklearn, 'linear_model'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
243 'sklearn_cluster' : getattr(sklearn, 'cluster'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
244 'sklearn_decomposition' : getattr(sklearn, 'decomposition'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
245 'sklearn_preprocessing' : getattr(sklearn, 'preprocessing'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
246 'sklearn_feature_selection' : getattr(sklearn, 'feature_selection'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
247 'sklearn_kernel_approximation' : getattr(sklearn, 'kernel_approximation'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
248 'skrebate_ReliefF': getattr(skrebate, 'ReliefF'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
249 'skrebate_SURF': getattr(skrebate, 'SURF'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
250 'skrebate_SURFstar': getattr(skrebate, 'SURFstar'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
251 'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
252 'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
253 'skrebate_TuRF': getattr(skrebate, 'TuRF'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
254 'xgboost_XGBClassifier' : getattr(xgboost, 'XGBClassifier'), |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
255 'xgboost_XGBRegressor' : getattr(xgboost, 'XGBRegressor') |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
256 } |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
257 syms.update(estimator_table) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
258 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
259 for key in unwanted: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
260 syms.pop(key, None) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
261 |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
262 super(SafeEval, self).__init__(symtable=syms, use_numpy=False, minimal=False, |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
263 no_if=True, no_for=True, no_while=True, no_try=True, |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
264 no_functiondef=True, no_ifexp=True, no_listcomp=False, |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
265 no_augassign=False, no_assert=True, no_delete=True, |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
266 no_raise=True, no_print=True) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
267 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
268 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
269 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
270 def get_estimator(estimator_json): |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
271 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
272 estimator_module = estimator_json['selected_module'] |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
273 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
274 if estimator_module == 'customer_estimator': |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
275 c_estimator = estimator_json['c_estimator'] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
276 with open(c_estimator, 'rb') as model_handler: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
277 new_model = load_model(model_handler) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
278 return new_model |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
279 |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
280 estimator_cls = estimator_json['selected_estimator'] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
281 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
282 if estimator_module == 'xgboost': |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
283 cls = getattr(xgboost, estimator_cls) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
284 else: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
285 module = getattr(sklearn, estimator_module) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
286 cls = getattr(module, estimator_cls) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
287 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
288 estimator = cls() |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
289 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
290 estimator_params = estimator_json['text_params'].strip() |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
291 if estimator_params != '': |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
292 try: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
293 params = safe_eval('dict(' + estimator_params + ')') |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
294 except ValueError: |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
295 sys.exit("Unsupported parameter input: `%s`" % estimator_params) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
296 estimator.set_params(**params) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
297 if 'n_jobs' in estimator.get_params(): |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
298 estimator.set_params(n_jobs=N_JOBS) |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
299 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
300 return estimator |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
301 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
302 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
303 def get_cv(cv_json): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
304 """ |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
305 cv_json: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
306 e.g.: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
307 { |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
308 'selected_cv': 'StratifiedKFold', |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
309 'n_splits': 3, |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
310 'shuffle': True, |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
311 'random_state': 0 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
312 } |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
313 """ |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
314 cv = cv_json.pop('selected_cv') |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
315 if cv == 'default': |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
316 return cv_json['n_splits'], None |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
317 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
318 groups = cv_json.pop('groups', None) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
319 if groups: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
320 groups = groups.strip() |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
321 if groups != '': |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
322 if groups.startswith('__ob__'): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
323 groups = groups[6:] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
324 if groups.endswith('__cb__'): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
325 groups = groups[:-6] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
326 groups = [int(x.strip()) for x in groups.split(',')] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
327 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
328 for k, v in cv_json.items(): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
329 if v == '': |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
330 cv_json[k] = None |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
331 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
332 test_fold = cv_json.get('test_fold', None) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
333 if test_fold: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
334 if test_fold.startswith('__ob__'): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
335 test_fold = test_fold[6:] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
336 if test_fold.endswith('__cb__'): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
337 test_fold = test_fold[:-6] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
338 cv_json['test_fold'] = [int(x.strip()) for x in test_fold.split(',')] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
339 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
340 test_size = cv_json.get('test_size', None) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
341 if test_size and test_size > 1.0: |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
342 cv_json['test_size'] = int(test_size) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
343 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
344 cv_class = getattr(model_selection, cv) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
345 splitter = cv_class(**cv_json) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
346 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
347 return splitter, groups |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
348 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
349 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
350 # needed when sklearn < v0.20 |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
351 def balanced_accuracy_score(y_true, y_pred): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
352 C = metrics.confusion_matrix(y_true, y_pred) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
353 with np.errstate(divide='ignore', invalid='ignore'): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
354 per_class = np.diag(C) / C.sum(axis=1) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
355 if np.any(np.isnan(per_class)): |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
356 warnings.warn('y_pred contains classes not in y_true') |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
357 per_class = per_class[~np.isnan(per_class)] |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
358 score = np.mean(per_class) |
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
359 return score |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
360 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
361 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
362 def get_scoring(scoring_json): |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
363 |
23
39ae276e75d9
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
364 if scoring_json['primary_scoring'] == 'default': |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
365 return None |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
366 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
367 my_scorers = metrics.SCORERS |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
368 if 'balanced_accuracy' not in my_scorers: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
369 my_scorers['balanced_accuracy'] = metrics.make_scorer(balanced_accuracy_score) |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
370 |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
371 if scoring_json['secondary_scoring'] != 'None'\ |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
372 and scoring_json['secondary_scoring'] != scoring_json['primary_scoring']: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
373 scoring = {} |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
374 scoring['primary'] = my_scorers[scoring_json['primary_scoring']] |
19
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
375 for scorer in scoring_json['secondary_scoring'].split(','): |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
376 if scorer != scoring_json['primary_scoring']: |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
377 scoring[scorer] = my_scorers[scorer] |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
378 return scoring |
4570575d060c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
379 |
21
9ce3e347506c
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
380 return my_scorers[scoring_json['primary_scoring']] |