Mercurial > repos > bgruening > scipy_sparse
annotate utils.py @ 25:648b81bca7c5 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 49522db5f2dc8a571af49e3f38e80c22571068f4
author | bgruening |
---|---|
date | Tue, 09 Jul 2019 19:41:01 -0400 |
parents | b9ed7b774ba3 |
children |
rev | line source |
---|---|
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
1 import ast |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
2 import json |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
3 import imblearn |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
4 import numpy as np |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
5 import pandas |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
6 import pickle |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
7 import re |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
8 import scipy |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
9 import sklearn |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
10 import skrebate |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
11 import sys |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
12 import warnings |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
13 import xgboost |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
14 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
15 from collections import Counter |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
16 from asteval import Interpreter, make_symbol_table |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
17 from imblearn import under_sampling, over_sampling, combine |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
18 from imblearn.pipeline import Pipeline as imbPipeline |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
19 from mlxtend import regressor, classifier |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
20 from scipy.io import mmread |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
21 from sklearn import ( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
22 cluster, compose, decomposition, ensemble, feature_extraction, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
23 feature_selection, gaussian_process, kernel_approximation, metrics, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
24 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
25 svm, linear_model, tree, discriminant_analysis) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
26 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
27 try: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
28 import iraps_classifier |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
29 except ImportError: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
30 pass |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
31 |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
32 try: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
33 import model_validations |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
34 except ImportError: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
35 pass |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
36 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
37 try: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
38 import feature_selectors |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
39 except ImportError: |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
40 pass |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
41 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
42 try: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
43 import preprocessors |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
44 except ImportError: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
45 pass |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
46 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
47 # handle pickle white list file |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
48 WL_FILE = __import__('os').path.join( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
49 __import__('os').path.dirname(__file__), 'pk_whitelist.json') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
50 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
51 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
52 |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
53 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
54 class _SafePickler(pickle.Unpickler, object): |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
55 """ |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
56 Used to safely deserialize scikit-learn model objects |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
57 Usage: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
58 eg.: _SafePickler.load(pickled_file_object) |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
59 """ |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
60 def __init__(self, file): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
61 super(_SafePickler, self).__init__(file) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
62 # load global white list |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
63 with open(WL_FILE, 'r') as f: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
64 self.pk_whitelist = json.load(f) |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
65 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
66 self.bad_names = ( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
67 'and', 'as', 'assert', 'break', 'class', 'continue', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
68 'def', 'del', 'elif', 'else', 'except', 'exec', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
69 'finally', 'for', 'from', 'global', 'if', 'import', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
70 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
71 'raise', 'return', 'try', 'system', 'while', 'with', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
72 'True', 'False', 'None', 'eval', 'execfile', '__import__', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
73 '__package__', '__subclasses__', '__bases__', '__globals__', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
74 '__code__', '__closure__', '__func__', '__self__', '__module__', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
75 '__dict__', '__class__', '__call__', '__get__', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
76 '__getattribute__', '__subclasshook__', '__new__', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
77 '__init__', 'func_globals', 'func_code', 'func_closure', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
78 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
79 '__asteval__', 'f_locals', '__mro__') |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
80 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
81 # unclassified good globals |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
82 self.good_names = [ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
83 'copy_reg._reconstructor', '__builtin__.object', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
84 '__builtin__.bytearray', 'builtins.object', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
85 'builtins.bytearray', 'keras.engine.sequential.Sequential', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
86 'keras.engine.sequential.Model'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
87 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
88 # custom module in Galaxy-ML |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
89 self.custom_modules = [ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
90 '__main__', 'keras_galaxy_models', 'feature_selectors', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
91 'preprocessors', 'iraps_classifier', 'model_validations'] |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
92 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
93 # override |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
94 def find_class(self, module, name): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
95 # balack list first |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
96 if name in self.bad_names: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
97 raise pickle.UnpicklingError("global '%s.%s' is forbidden" |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
98 % (module, name)) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
99 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
100 # custom module in Galaxy-ML |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
101 if module in self.custom_modules: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
102 cutom_module = sys.modules.get(module, None) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
103 if cutom_module: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
104 return getattr(cutom_module, name) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
105 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
106 raise pickle.UnpicklingError("Module %s' is not imported" |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
107 % module) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
108 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
109 # For objects from outside libraries, it's necessary to verify |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
110 # both module and name. Currently only a blacklist checker |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
111 # is working. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
112 # TODO: replace with a whitelist checker. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
113 good_names = self.good_names |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
114 pk_whitelist = self.pk_whitelist |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
115 if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name): |
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
116 fullname = module + '.' + name |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
117 if (fullname in good_names)\ |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
118 or (module.startswith(('sklearn.', 'xgboost.', 'skrebate.', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
119 'imblearn.', 'mlxtend.', 'numpy.')) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
120 or module == 'numpy'): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
121 if fullname not in (pk_whitelist['SK_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
122 pk_whitelist['SKR_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
123 pk_whitelist['XGB_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
124 pk_whitelist['NUMPY_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
125 pk_whitelist['IMBLEARN_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
126 pk_whitelist['MLXTEND_NAMES'] + |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
127 good_names): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
128 # raise pickle.UnpicklingError |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
129 print("Warning: global %s is not in pickler whitelist " |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
130 "yet and will loss support soon. Contact tool " |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
131 "author or leave a message at github.com" % fullname) |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
132 mod = sys.modules[module] |
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
133 return getattr(mod, name) |
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
134 |
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
135 raise pickle.UnpicklingError("global '%s' is forbidden" % fullname) |
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
136 |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
137 |
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
138 def load_model(file): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
139 """Load pickled object with `_SafePicker` |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
140 """ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
141 return _SafePickler(file).load() |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
142 |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
143 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
144 def read_columns(f, c=None, c_option='by_index_number', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
145 return_df=False, **args): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
146 """Return array from a tabular dataset by various columns selection |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
147 """ |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
148 data = pandas.read_csv(f, **args) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
149 if c_option == 'by_index_number': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
150 cols = list(map(lambda x: x - 1, c)) |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
151 data = data.iloc[:, cols] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
152 if c_option == 'all_but_by_index_number': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
153 cols = list(map(lambda x: x - 1, c)) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
154 data.drop(data.columns[cols], axis=1, inplace=True) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
155 if c_option == 'by_header_name': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
156 cols = [e.strip() for e in c.split(',')] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
157 data = data[cols] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
158 if c_option == 'all_but_by_header_name': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
159 cols = [e.strip() for e in c.split(',')] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
160 data.drop(cols, axis=1, inplace=True) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
161 y = data.values |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
162 if return_df: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
163 return y, data |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
164 else: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
165 return y |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
166 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
167 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
168 def feature_selector(inputs, X=None, y=None): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
169 """generate an instance of sklearn.feature_selection classes |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
170 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
171 Parameters |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
172 ---------- |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
173 inputs : dict |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
174 From galaxy tool parameters. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
175 X : array |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
176 Containing training features. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
177 y : array or list |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
178 Target values. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
179 """ |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
180 selector = inputs['selected_algorithm'] |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
181 if selector != 'DyRFECV': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
182 selector = getattr(sklearn.feature_selection, selector) |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
183 options = inputs['options'] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
184 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
185 if inputs['selected_algorithm'] == 'SelectFromModel': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
186 if not options['threshold'] or options['threshold'] == 'None': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
187 options['threshold'] = None |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
188 else: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
189 try: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
190 options['threshold'] = float(options['threshold']) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
191 except ValueError: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
192 pass |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
193 if inputs['model_inputter']['input_mode'] == 'prefitted': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
194 model_file = inputs['model_inputter']['fitted_estimator'] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
195 with open(model_file, 'rb') as model_handler: |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
196 fitted_estimator = load_model(model_handler) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
197 new_selector = selector(fitted_estimator, prefit=True, **options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
198 else: |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
199 estimator_json = inputs['model_inputter']['estimator_selector'] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
200 estimator = get_estimator(estimator_json) |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
201 check_feature_importances = try_get_attr( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
202 'feature_selectors', 'check_feature_importances') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
203 estimator = check_feature_importances(estimator) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
204 new_selector = selector(estimator, **options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
205 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
206 elif inputs['selected_algorithm'] == 'RFE': |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
207 step = options.get('step', None) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
208 if step and step >= 1.0: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
209 options['step'] = int(step) |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
210 estimator = get_estimator(inputs["estimator_selector"]) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
211 check_feature_importances = try_get_attr( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
212 'feature_selectors', 'check_feature_importances') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
213 estimator = check_feature_importances(estimator) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
214 new_selector = selector(estimator, **options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
215 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
216 elif inputs['selected_algorithm'] == 'RFECV': |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
217 options['scoring'] = get_scoring(options['scoring']) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
218 options['n_jobs'] = N_JOBS |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
219 splitter, groups = get_cv(options.pop('cv_selector')) |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
220 if groups is None: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
221 options['cv'] = splitter |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
222 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
223 options['cv'] = list(splitter.split(X, y, groups=groups)) |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
224 step = options.get('step', None) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
225 if step and step >= 1.0: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
226 options['step'] = int(step) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
227 estimator = get_estimator(inputs['estimator_selector']) |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
228 check_feature_importances = try_get_attr( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
229 'feature_selectors', 'check_feature_importances') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
230 estimator = check_feature_importances(estimator) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
231 new_selector = selector(estimator, **options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
232 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
233 elif inputs['selected_algorithm'] == 'DyRFECV': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
234 options['scoring'] = get_scoring(options['scoring']) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
235 options['n_jobs'] = N_JOBS |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
236 splitter, groups = get_cv(options.pop('cv_selector')) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
237 if groups is None: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
238 options['cv'] = splitter |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
239 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
240 options['cv'] = list(splitter.split(X, y, groups=groups)) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
241 step = options.get('step') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
242 if not step or step == 'None': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
243 step = None |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
244 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
245 step = ast.literal_eval(step) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
246 options['step'] = step |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
247 estimator = get_estimator(inputs["estimator_selector"]) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
248 check_feature_importances = try_get_attr( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
249 'feature_selectors', 'check_feature_importances') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
250 estimator = check_feature_importances(estimator) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
251 DyRFECV = try_get_attr('feature_selectors', 'DyRFECV') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
252 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
253 new_selector = DyRFECV(estimator, **options) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
254 |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
255 elif inputs['selected_algorithm'] == 'VarianceThreshold': |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
256 new_selector = selector(**options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
257 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
258 else: |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
259 score_func = inputs['score_func'] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
260 score_func = getattr(sklearn.feature_selection, score_func) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
261 new_selector = selector(score_func, **options) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
262 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
263 return new_selector |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
264 |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
265 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
266 def get_X_y(params, file1, file2): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
267 """Return machine learning inputs X, y from tabluar inputs |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
268 """ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
269 input_type = (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
270 ['input_options']['selected_input']) |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
271 if input_type == 'tabular': |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
272 header = 'infer' if (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
273 ['input_options']['header1']) else None |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
274 column_option = (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
275 ['input_options']['column_selector_options_1'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
276 ['selected_column_selector_option']) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
277 if column_option in ['by_index_number', 'all_but_by_index_number', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
278 'by_header_name', 'all_but_by_header_name']: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
279 c = (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
280 ['input_options']['column_selector_options_1']['col1']) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
281 else: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
282 c = None |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
283 X = read_columns( |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
284 file1, |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
285 c=c, |
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
286 c_option=column_option, |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
287 sep='\t', |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
288 header=header, |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
289 parse_dates=True).astype(float) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
290 else: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
291 X = mmread(file1) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
292 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
293 header = 'infer' if (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
294 ['input_options']['header2']) else None |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
295 column_option = (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
296 ['input_options']['column_selector_options_2'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
297 ['selected_column_selector_option2']) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
298 if column_option in ['by_index_number', 'all_but_by_index_number', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
299 'by_header_name', 'all_but_by_header_name']: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
300 c = (params['selected_tasks']['selected_algorithms'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
301 ['input_options']['column_selector_options_2']['col2']) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
302 else: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
303 c = None |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
304 y = read_columns( |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
305 file2, |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
306 c=c, |
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
307 c_option=column_option, |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
308 sep='\t', |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
309 header=header, |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
310 parse_dates=True) |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
311 y = y.ravel() |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
312 |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
313 return X, y |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
314 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
315 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
316 class SafeEval(Interpreter): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
317 """Customized symbol table for safely literal eval |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
318 """ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
319 def __init__(self, load_scipy=False, load_numpy=False, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
320 load_estimators=False): |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
321 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
322 # File opening and other unneeded functions could be dropped |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
323 unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr'] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
324 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
325 # Allowed symbol table. Add more if needed. |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
326 new_syms = { |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
327 'np_arange': getattr(np, 'arange'), |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
328 'ensemble_ExtraTreesClassifier': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
329 getattr(ensemble, 'ExtraTreesClassifier') |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
330 } |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
331 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
332 syms = make_symbol_table(use_numpy=False, **new_syms) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
333 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
334 if load_scipy: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
335 scipy_distributions = scipy.stats.distributions.__dict__ |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
336 for k, v in scipy_distributions.items(): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
337 if isinstance(v, (scipy.stats.rv_continuous, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
338 scipy.stats.rv_discrete)): |
20
60945fb5d650
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 8cf3d813ec755166ee0bd517b4ecbbd4f84d4df1
bgruening
parents:
19
diff
changeset
|
339 syms['scipy_stats_' + k] = v |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
340 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
341 if load_numpy: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
342 from_numpy_random = [ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
343 'beta', 'binomial', 'bytes', 'chisquare', 'choice', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
344 'dirichlet', 'division', 'exponential', 'f', 'gamma', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
345 'geometric', 'gumbel', 'hypergeometric', 'laplace', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
346 'logistic', 'lognormal', 'logseries', 'mtrand', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
347 'multinomial', 'multivariate_normal', 'negative_binomial', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
348 'noncentral_chisquare', 'noncentral_f', 'normal', 'pareto', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
349 'permutation', 'poisson', 'power', 'rand', 'randint', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
350 'randn', 'random', 'random_integers', 'random_sample', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
351 'ranf', 'rayleigh', 'sample', 'seed', 'set_state', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
352 'shuffle', 'standard_cauchy', 'standard_exponential', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
353 'standard_gamma', 'standard_normal', 'standard_t', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
354 'triangular', 'uniform', 'vonmises', 'wald', 'weibull', 'zipf'] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
355 for f in from_numpy_random: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
356 syms['np_random_' + f] = getattr(np.random, f) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
357 |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
358 if load_estimators: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
359 estimator_table = { |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
360 'sklearn_svm': getattr(sklearn, 'svm'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
361 'sklearn_tree': getattr(sklearn, 'tree'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
362 'sklearn_ensemble': getattr(sklearn, 'ensemble'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
363 'sklearn_neighbors': getattr(sklearn, 'neighbors'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
364 'sklearn_naive_bayes': getattr(sklearn, 'naive_bayes'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
365 'sklearn_linear_model': getattr(sklearn, 'linear_model'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
366 'sklearn_cluster': getattr(sklearn, 'cluster'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
367 'sklearn_decomposition': getattr(sklearn, 'decomposition'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
368 'sklearn_preprocessing': getattr(sklearn, 'preprocessing'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
369 'sklearn_feature_selection': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
370 getattr(sklearn, 'feature_selection'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
371 'sklearn_kernel_approximation': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
372 getattr(sklearn, 'kernel_approximation'), |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
373 'skrebate_ReliefF': getattr(skrebate, 'ReliefF'), |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
374 'skrebate_SURF': getattr(skrebate, 'SURF'), |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
375 'skrebate_SURFstar': getattr(skrebate, 'SURFstar'), |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
376 'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'), |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
377 'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'), |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
378 'skrebate_TuRF': getattr(skrebate, 'TuRF'), |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
379 'xgboost_XGBClassifier': getattr(xgboost, 'XGBClassifier'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
380 'xgboost_XGBRegressor': getattr(xgboost, 'XGBRegressor'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
381 'imblearn_over_sampling': getattr(imblearn, 'over_sampling'), |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
382 'imblearn_combine': getattr(imblearn, 'combine') |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
383 } |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
384 syms.update(estimator_table) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
385 |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
386 for key in unwanted: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
387 syms.pop(key, None) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
388 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
389 super(SafeEval, self).__init__( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
390 symtable=syms, use_numpy=False, minimal=False, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
391 no_if=True, no_for=True, no_while=True, no_try=True, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
392 no_functiondef=True, no_ifexp=True, no_listcomp=False, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
393 no_augassign=False, no_assert=True, no_delete=True, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
394 no_raise=True, no_print=True) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
395 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
396 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
397 def get_estimator(estimator_json): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
398 """Return a sklearn or compatible estimator from Galaxy tool inputs |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
399 """ |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
400 estimator_module = estimator_json['selected_module'] |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
401 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
402 if estimator_module == 'custom_estimator': |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
403 c_estimator = estimator_json['c_estimator'] |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
404 with open(c_estimator, 'rb') as model_handler: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
405 new_model = load_model(model_handler) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
406 return new_model |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
407 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
408 if estimator_module == "binarize_target": |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
409 wrapped_estimator = estimator_json['wrapped_estimator'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
410 with open(wrapped_estimator, 'rb') as model_handler: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
411 wrapped_estimator = load_model(model_handler) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
412 options = {} |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
413 if estimator_json['z_score'] is not None: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
414 options['z_score'] = estimator_json['z_score'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
415 if estimator_json['value'] is not None: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
416 options['value'] = estimator_json['value'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
417 options['less_is_positive'] = estimator_json['less_is_positive'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
418 if estimator_json['clf_or_regr'] == 'BinarizeTargetClassifier': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
419 klass = try_get_attr('iraps_classifier', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
420 'BinarizeTargetClassifier') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
421 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
422 klass = try_get_attr('iraps_classifier', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
423 'BinarizeTargetRegressor') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
424 return klass(wrapped_estimator, **options) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
425 |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
426 estimator_cls = estimator_json['selected_estimator'] |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
427 |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
428 if estimator_module == 'xgboost': |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
429 klass = getattr(xgboost, estimator_cls) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
430 else: |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
431 module = getattr(sklearn, estimator_module) |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
432 klass = getattr(module, estimator_cls) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
433 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
434 estimator = klass() |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
435 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
436 estimator_params = estimator_json['text_params'].strip() |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
437 if estimator_params != '': |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
438 try: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
439 safe_eval = SafeEval() |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
440 params = safe_eval('dict(' + estimator_params + ')') |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
441 except ValueError: |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
442 sys.exit("Unsupported parameter input: `%s`" % estimator_params) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
443 estimator.set_params(**params) |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
444 if 'n_jobs' in estimator.get_params(): |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
445 estimator.set_params(n_jobs=N_JOBS) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
446 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
447 return estimator |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
448 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
449 |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
450 def get_cv(cv_json): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
451 """ Return CV splitter from Galaxy tool inputs |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
452 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
453 Parameters |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
454 ---------- |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
455 cv_json : dict |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
456 From Galaxy tool inputs. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
457 e.g.: |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
458 { |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
459 'selected_cv': 'StratifiedKFold', |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
460 'n_splits': 3, |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
461 'shuffle': True, |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
462 'random_state': 0 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
463 } |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
464 """ |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
465 cv = cv_json.pop('selected_cv') |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
466 if cv == 'default': |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
467 return cv_json['n_splits'], None |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
468 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
469 groups = cv_json.pop('groups_selector', None) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
470 if groups is not None: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
471 infile_g = groups['infile_g'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
472 header = 'infer' if groups['header_g'] else None |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
473 column_option = (groups['column_selector_options_g'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
474 ['selected_column_selector_option_g']) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
475 if column_option in ['by_index_number', 'all_but_by_index_number', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
476 'by_header_name', 'all_but_by_header_name']: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
477 c = groups['column_selector_options_g']['col_g'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
478 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
479 c = None |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
480 groups = read_columns( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
481 infile_g, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
482 c=c, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
483 c_option=column_option, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
484 sep='\t', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
485 header=header, |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
486 parse_dates=True) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
487 groups = groups.ravel() |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
488 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
489 for k, v in cv_json.items(): |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
490 if v == '': |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
491 cv_json[k] = None |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
492 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
493 test_fold = cv_json.get('test_fold', None) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
494 if test_fold: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
495 if test_fold.startswith('__ob__'): |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
496 test_fold = test_fold[6:] |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
497 if test_fold.endswith('__cb__'): |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
498 test_fold = test_fold[:-6] |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
499 cv_json['test_fold'] = [int(x.strip()) for x in test_fold.split(',')] |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
500 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
501 test_size = cv_json.get('test_size', None) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
502 if test_size and test_size > 1.0: |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
503 cv_json['test_size'] = int(test_size) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
504 |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
505 if cv == 'OrderedKFold': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
506 cv_class = try_get_attr('model_validations', 'OrderedKFold') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
507 elif cv == 'RepeatedOrderedKFold': |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
508 cv_class = try_get_attr('model_validations', 'RepeatedOrderedKFold') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
509 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
510 cv_class = getattr(model_selection, cv) |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
511 splitter = cv_class(**cv_json) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
512 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
513 return splitter, groups |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
514 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
515 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
516 # needed when sklearn < v0.20 |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
517 def balanced_accuracy_score(y_true, y_pred): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
518 """Compute balanced accuracy score, which is now available in |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
519 scikit-learn from v0.20.0. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
520 """ |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
521 C = metrics.confusion_matrix(y_true, y_pred) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
522 with np.errstate(divide='ignore', invalid='ignore'): |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
523 per_class = np.diag(C) / C.sum(axis=1) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
524 if np.any(np.isnan(per_class)): |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
525 warnings.warn('y_pred contains classes not in y_true') |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
526 per_class = per_class[~np.isnan(per_class)] |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
527 score = np.mean(per_class) |
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
528 return score |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
529 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
530 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
531 def get_scoring(scoring_json): |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
532 """Return single sklearn scorer class |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
533 or multiple scoers in dictionary |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
534 """ |
23
27c0b1a050df
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
bgruening
parents:
21
diff
changeset
|
535 if scoring_json['primary_scoring'] == 'default': |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
536 return None |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
537 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
538 my_scorers = metrics.SCORERS |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
539 my_scorers['binarize_auc_scorer'] =\ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
540 try_get_attr('iraps_classifier', 'binarize_auc_scorer') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
541 my_scorers['binarize_average_precision_scorer'] =\ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
542 try_get_attr('iraps_classifier', 'binarize_average_precision_scorer') |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
543 if 'balanced_accuracy' not in my_scorers: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
544 my_scorers['balanced_accuracy'] =\ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
545 metrics.make_scorer(balanced_accuracy_score) |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
546 |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
547 if scoring_json['secondary_scoring'] != 'None'\ |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
548 and scoring_json['secondary_scoring'] !=\ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
549 scoring_json['primary_scoring']: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
550 return_scoring = {} |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
551 primary_scoring = scoring_json['primary_scoring'] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
552 return_scoring[primary_scoring] = my_scorers[primary_scoring] |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
553 for scorer in scoring_json['secondary_scoring'].split(','): |
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
554 if scorer != scoring_json['primary_scoring']: |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
555 return_scoring[scorer] = my_scorers[scorer] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
556 return return_scoring |
19
c92a4d1252e1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
bgruening
parents:
diff
changeset
|
557 |
21
f9a2fe161db4
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
bgruening
parents:
20
diff
changeset
|
558 return my_scorers[scoring_json['primary_scoring']] |
24
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
559 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
560 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
561 def get_search_params(estimator): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
562 """Format the output of `estimator.get_params()` |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
563 """ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
564 params = estimator.get_params() |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
565 results = [] |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
566 for k, v in params.items(): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
567 # params below won't be shown for search in the searchcv tool |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
568 keywords = ('n_jobs', 'pre_dispatch', 'memory', 'steps', |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
569 'nthread', 'verbose') |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
570 if k.endswith(keywords): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
571 results.append(['*', k, k+": "+repr(v)]) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
572 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
573 results.append(['@', k, k+": "+repr(v)]) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
574 results.append( |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
575 ["", "Note:", |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
576 "@, params eligible for search in searchcv tool."]) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
577 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
578 return results |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
579 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
580 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
581 def try_get_attr(module, name): |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
582 """try to get attribute from a custom module |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
583 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
584 Parameters |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
585 ---------- |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
586 module : str |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
587 Module name |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
588 name : str |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
589 Attribute (class/function) name. |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
590 |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
591 Returns |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
592 ------- |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
593 class or function |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
594 """ |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
595 mod = sys.modules.get(module, None) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
596 if mod: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
597 return getattr(mod, name) |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
598 else: |
b9ed7b774ba3
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
bgruening
parents:
23
diff
changeset
|
599 raise Exception("No module named %s." % module) |