Mercurial > repos > bgruening > sklearn_numeric_clustering
annotate utils.py @ 21:1d6d938e7ee2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author | bgruening |
---|---|
date | Sat, 29 Sep 2018 07:29:32 -0400 |
parents | e6b45e6447fc |
children | 9d234733ccfd |
rev | line source |
---|---|
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
1 import sys |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
2 import os |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
3 import pandas |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
4 import re |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
5 import pickle |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
6 import warnings |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
7 import numpy as np |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
8 import xgboost |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
9 import scipy |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
10 import sklearn |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
11 from asteval import Interpreter, make_symbol_table |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
12 from sklearn import (cluster, decomposition, ensemble, feature_extraction, feature_selection, |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
13 gaussian_process, kernel_approximation, metrics, |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
14 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
15 svm, linear_model, tree, discriminant_analysis) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
16 |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
17 N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
18 |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
19 |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
20 class SafePickler(pickle.Unpickler): |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
21 """ |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
22 Used to safely deserialize scikit-learn model objects serialized by cPickle.dump |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
23 Usage: |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
24 eg.: SafePickler.load(pickled_file_object) |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
25 """ |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
26 def find_class(self, module, name): |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
27 |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
28 bad_names = ('and', 'as', 'assert', 'break', 'class', 'continue', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
29 'def', 'del', 'elif', 'else', 'except', 'exec', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
30 'finally', 'for', 'from', 'global', 'if', 'import', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
31 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
32 'raise', 'return', 'try', 'system', 'while', 'with', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
33 'True', 'False', 'None', 'eval', 'execfile', '__import__', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
34 '__package__', '__subclasses__', '__bases__', '__globals__', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
35 '__code__', '__closure__', '__func__', '__self__', '__module__', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
36 '__dict__', '__class__', '__call__', '__get__', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
37 '__getattribute__', '__subclasshook__', '__new__', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
38 '__init__', 'func_globals', 'func_code', 'func_closure', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
39 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
40 '__asteval__', 'f_locals', '__mro__') |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
41 good_names = ['copy_reg._reconstructor', '__builtin__.object'] |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
42 |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
43 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name): |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
44 fullname = module + '.' + name |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
45 if (fullname in good_names)\ |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
46 or ( ( module.startswith('sklearn.') |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
47 or module.startswith('xgboost.') |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
48 or module.startswith('skrebate.') |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
49 or module.startswith('numpy.') |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
50 or module == 'numpy' |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
51 ) |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
52 and (name not in bad_names) |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
53 ): |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
54 # TODO: replace with a whitelist checker |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
55 if fullname not in sk_whitelist['SK_NAMES'] + sk_whitelist['SKR_NAMES'] + sk_whitelist['XGB_NAMES'] + sk_whitelist['NUMPY_NAMES'] + good_names: |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
56 print("Warning: global %s is not in pickler whitelist yet and will loss support soon. Contact tool author or leave a message at github.com" % fullname) |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
57 mod = sys.modules[module] |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
58 return getattr(mod, name) |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
59 |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
60 raise pickle.UnpicklingError("global '%s' is forbidden" % fullname) |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
61 |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
62 |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
63 def load_model(file): |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
64 return SafePickler(file).load() |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
65 |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
66 |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
67 def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
68 data = pandas.read_csv(f, **args) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
69 if c_option == 'by_index_number': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
70 cols = list(map(lambda x: x - 1, c)) |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
71 data = data.iloc[:, cols] |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
72 if c_option == 'all_but_by_index_number': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
73 cols = list(map(lambda x: x - 1, c)) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
74 data.drop(data.columns[cols], axis=1, inplace=True) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
75 if c_option == 'by_header_name': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
76 cols = [e.strip() for e in c.split(',')] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
77 data = data[cols] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
78 if c_option == 'all_but_by_header_name': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
79 cols = [e.strip() for e in c.split(',')] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
80 data.drop(cols, axis=1, inplace=True) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
81 y = data.values |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
82 if return_df: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
83 return y, data |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
84 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
85 return y |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
86 return y |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
87 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
88 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
89 ## generate an instance for one of sklearn.feature_selection classes |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
90 def feature_selector(inputs): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
91 selector = inputs["selected_algorithm"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
92 selector = getattr(sklearn.feature_selection, selector) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
93 options = inputs["options"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
94 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
95 if inputs['selected_algorithm'] == 'SelectFromModel': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
96 if not options['threshold'] or options['threshold'] == 'None': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
97 options['threshold'] = None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
98 if inputs['model_inputter']['input_mode'] == 'prefitted': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
99 model_file = inputs['model_inputter']['fitted_estimator'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
100 with open(model_file, 'rb') as model_handler: |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
101 fitted_estimator = load_model(model_handler) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
102 new_selector = selector(fitted_estimator, prefit=True, **options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
103 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
104 estimator_json = inputs['model_inputter']["estimator_selector"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
105 estimator = get_estimator(estimator_json) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
106 new_selector = selector(estimator, **options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
107 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
108 elif inputs['selected_algorithm'] == 'RFE': |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
109 estimator = get_estimator(inputs["estimator_selector"]) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
110 new_selector = selector(estimator, **options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
111 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
112 elif inputs['selected_algorithm'] == 'RFECV': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
113 options['scoring'] = get_scoring(options['scoring']) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
114 options['n_jobs'] = N_JOBS |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
115 options['cv'] = get_cv(options['cv'].strip()) |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
116 estimator = get_estimator(inputs["estimator_selector"]) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
117 new_selector = selector(estimator, **options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
118 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
119 elif inputs['selected_algorithm'] == "VarianceThreshold": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
120 new_selector = selector(**options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
121 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
122 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
123 score_func = inputs["score_func"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
124 score_func = getattr(sklearn.feature_selection, score_func) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
125 new_selector = selector(score_func, **options) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
126 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
127 return new_selector |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
128 |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
129 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
130 def get_X_y(params, file1, file2): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
131 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
132 if input_type == "tabular": |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
133 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
134 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
135 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
136 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
137 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
138 c = None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
139 X = read_columns( |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
140 file1, |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
141 c=c, |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
142 c_option=column_option, |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
143 sep='\t', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
144 header=header, |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
145 parse_dates=True |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
146 ) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
147 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
148 X = mmread(file1) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
149 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
150 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
151 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
152 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
153 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
154 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
155 c = None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
156 y = read_columns( |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
157 file2, |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
158 c=c, |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
159 c_option=column_option, |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
160 sep='\t', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
161 header=header, |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
162 parse_dates=True |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
163 ) |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
164 y = y.ravel() |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
165 return X, y |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
166 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
167 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
168 class SafeEval(Interpreter): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
169 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
170 def __init__(self, load_scipy=False, load_numpy=False): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
171 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
172 # File opening and other unneeded functions could be dropped |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
173 unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
174 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
175 # Allowed symbol table. Add more if needed. |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
176 new_syms = { |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
177 'np_arange': getattr(np, 'arange'), |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
178 'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier') |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
179 } |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
180 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
181 syms = make_symbol_table(use_numpy=False, **new_syms) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
182 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
183 if load_scipy: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
184 scipy_distributions = scipy.stats.distributions.__dict__ |
20
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
185 for k, v in scipy_distributions.items(): |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
186 if isinstance(v, (scipy.stats.rv_continuous, scipy.stats.rv_discrete)): |
e6b45e6447fc
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
187 syms['scipy_stats_' + k] = v |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
188 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
189 if load_numpy: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
190 from_numpy_random = ['beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
191 'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
192 'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
193 'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
194 'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
195 'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
196 'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential', |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
197 'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform', |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
198 'vonmises', 'wald', 'weibull', 'zipf'] |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
199 for f in from_numpy_random: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
200 syms['np_random_' + f] = getattr(np.random, f) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
201 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
202 for key in unwanted: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
203 syms.pop(key, None) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
204 |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
205 super(SafeEval, self).__init__(symtable=syms, use_numpy=False, minimal=False, |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
206 no_if=True, no_for=True, no_while=True, no_try=True, |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
207 no_functiondef=True, no_ifexp=True, no_listcomp=False, |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
208 no_augassign=False, no_assert=True, no_delete=True, |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
209 no_raise=True, no_print=True) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
210 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
211 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
212 def get_search_params(params_builder): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
213 search_params = {} |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
214 safe_eval = SafeEval(load_scipy=True, load_numpy=True) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
215 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
216 for p in params_builder['param_set']: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
217 search_p = p['search_param_selector']['search_p'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
218 if search_p.strip() == '': |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
219 continue |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
220 param_type = p['search_param_selector']['selected_param_type'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
221 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
222 lst = search_p.split(":") |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
223 assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input." |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
224 literal = lst[1].strip() |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
225 ev = safe_eval(literal) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
226 if param_type == "final_estimator_p": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
227 search_params["estimator__" + lst[0].strip()] = ev |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
228 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
229 search_params["preprocessing_" + param_type[5:6] + "__" + lst[0].strip()] = ev |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
230 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
231 return search_params |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
232 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
233 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
234 def get_estimator(estimator_json): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
235 estimator_module = estimator_json['selected_module'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
236 estimator_cls = estimator_json['selected_estimator'] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
237 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
238 if estimator_module == "xgboost": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
239 cls = getattr(xgboost, estimator_cls) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
240 else: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
241 module = getattr(sklearn, estimator_module) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
242 cls = getattr(module, estimator_cls) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
243 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
244 estimator = cls() |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
245 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
246 estimator_params = estimator_json['text_params'].strip() |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
247 if estimator_params != "": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
248 try: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
249 params = safe_eval('dict(' + estimator_params + ')') |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
250 except ValueError: |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
251 sys.exit("Unsupported parameter input: `%s`" % estimator_params) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
252 estimator.set_params(**params) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
253 if 'n_jobs' in estimator.get_params(): |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
254 estimator.set_params(n_jobs=N_JOBS) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
255 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
256 return estimator |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
257 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
258 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
259 def get_cv(literal): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
260 safe_eval = SafeEval() |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
261 if literal == "": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
262 return None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
263 if literal.isdigit(): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
264 return int(literal) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
265 m = re.match(r'^(?P<method>\w+)\((?P<args>.*)\)$', literal) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
266 if m: |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
267 my_class = getattr(model_selection, m.group('method')) |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
268 args = safe_eval('dict('+ m.group('args') + ')') |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
269 return my_class(**args) |
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
270 sys.exit("Unsupported CV input: %s" % literal) |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
271 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
272 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
273 def get_scoring(scoring_json): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
274 def balanced_accuracy_score(y_true, y_pred): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
275 C = metrics.confusion_matrix(y_true, y_pred) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
276 with np.errstate(divide='ignore', invalid='ignore'): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
277 per_class = np.diag(C) / C.sum(axis=1) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
278 if np.any(np.isnan(per_class)): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
279 warnings.warn('y_pred contains classes not in y_true') |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
280 per_class = per_class[~np.isnan(per_class)] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
281 score = np.mean(per_class) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
282 return score |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
283 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
284 if scoring_json['primary_scoring'] == "default": |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
285 return None |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
286 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
287 my_scorers = metrics.SCORERS |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
288 if 'balanced_accuracy' not in my_scorers: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
289 my_scorers['balanced_accuracy'] = metrics.make_scorer(balanced_accuracy_score) |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
290 |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
291 if scoring_json['secondary_scoring'] != 'None'\ |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
292 and scoring_json['secondary_scoring'] != scoring_json['primary_scoring']: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
293 scoring = {} |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
294 scoring['primary'] = my_scorers[scoring_json['primary_scoring']] |
19
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
295 for scorer in scoring_json['secondary_scoring'].split(','): |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
296 if scorer != scoring_json['primary_scoring']: |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
297 scoring[scorer] = my_scorers[scorer] |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
298 return scoring |
6ba8d7af7e7a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
299 |
21
1d6d938e7ee2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
300 return my_scorers[scoring_json['primary_scoring']] |