annotate train_test_eval.py @ 35:61edd9e5c17f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:10:57 +0000
parents 5773e98921fc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
1 import argparse
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
2 import json
25
41b109e70a7f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
bgruening
parents: 21
diff changeset
3 import os
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
4 import warnings
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
5 from itertools import chain
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
6
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
7 import joblib
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
8 import numpy as np
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
9 import pandas as pd
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
10 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
11 from galaxy_ml.model_validations import train_test_split
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
12 from galaxy_ml.utils import (
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
13 clean_params,
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
14 get_module,
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
15 get_scoring,
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
16 read_columns,
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
17 SafeEval,
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
18 try_get_attr
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
19 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
20 from scipy.io import mmread
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
21 from sklearn import pipeline
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
22 from sklearn.model_selection import _search, _validation
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
23 from sklearn.model_selection._validation import _score
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
24 from sklearn.utils import _safe_indexing, indexable
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
25
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
26 _fit_and_score = try_get_attr("galaxy_ml.model_validations", "_fit_and_score")
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
27 setattr(_search, "_fit_and_score", _fit_and_score)
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
28 setattr(_validation, "_fit_and_score", _fit_and_score)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
29
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
30 N_JOBS = int(os.environ.get("GALAXY_SLOTS", 1))
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
31 CACHE_DIR = os.path.join(os.getcwd(), "cached")
25
41b109e70a7f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
bgruening
parents: 21
diff changeset
32 del os
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
33 NON_SEARCHABLE = ("n_jobs", "pre_dispatch", "memory", "_path", "nthread", "callbacks")
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
34 ALLOWED_CALLBACKS = (
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
35 "EarlyStopping",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
36 "TerminateOnNaN",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
37 "ReduceLROnPlateau",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
38 "CSVLogger",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
39 "None",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
40 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
41
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
42
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
43 def _eval_swap_params(params_builder):
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
44 swap_params = {}
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
45
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
46 for p in params_builder["param_set"]:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
47 swap_value = p["sp_value"].strip()
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
48 if swap_value == "":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
49 continue
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
50
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
51 param_name = p["sp_name"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
52 if param_name.lower().endswith(NON_SEARCHABLE):
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
53 warnings.warn(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
54 "Warning: `%s` is not eligible for search and was "
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
55 "omitted!" % param_name
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
56 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
57 continue
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
58
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
59 if not swap_value.startswith(":"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
60 safe_eval = SafeEval(load_scipy=True, load_numpy=True)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
61 ev = safe_eval(swap_value)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
62 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
63 # Have `:` before search list, asks for estimator evaluatio
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
64 safe_eval_es = SafeEval(load_estimators=True)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
65 swap_value = swap_value[1:].strip()
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
66 # TODO maybe add regular express check
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
67 ev = safe_eval_es(swap_value)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
68
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
69 swap_params[param_name] = ev
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
70
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
71 return swap_params
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
72
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
73
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
74 def train_test_split_none(*arrays, **kwargs):
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
75 """extend train_test_split to take None arrays
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
76 and support split by group names.
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
77 """
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
78 nones = []
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
79 new_arrays = []
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
80 for idx, arr in enumerate(arrays):
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
81 if arr is None:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
82 nones.append(idx)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
83 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
84 new_arrays.append(arr)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
85
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
86 if kwargs["shuffle"] == "None":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
87 kwargs["shuffle"] = None
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
88
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
89 group_names = kwargs.pop("group_names", None)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
90
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
91 if group_names is not None and group_names.strip():
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
92 group_names = [name.strip() for name in group_names.split(",")]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
93 new_arrays = indexable(*new_arrays)
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
94 groups = kwargs["labels"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
95 n_samples = new_arrays[0].shape[0]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
96 index_arr = np.arange(n_samples)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
97 test = index_arr[np.isin(groups, group_names)]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
98 train = index_arr[~np.isin(groups, group_names)]
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
99 rval = list(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
100 chain.from_iterable(
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
101 (_safe_indexing(a, train), _safe_indexing(a, test)) for a in new_arrays
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
102 )
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
103 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
104 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
105 rval = train_test_split(*new_arrays, **kwargs)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
106
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
107 for pos in nones:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
108 rval[pos * 2: 2] = [None, None]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
109
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
110 return rval
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
111
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
112
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
113 def main(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
114 inputs,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
115 infile_estimator,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
116 infile1,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
117 infile2,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
118 outfile_result,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
119 outfile_object=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
120 outfile_weights=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
121 groups=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
122 ref_seq=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
123 intervals=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
124 targets=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
125 fasta_path=None,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
126 ):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
127 """
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
128 Parameter
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
129 ---------
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
130 inputs : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
131 File path to galaxy tool parameter
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
132
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
133 infile_estimator : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
134 File path to estimator
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
135
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
136 infile1 : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
137 File path to dataset containing features
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
138
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
139 infile2 : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
140 File path to dataset containing target values
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
141
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
142 outfile_result : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
143 File path to save the results, either cv_results or test result
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
144
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
145 outfile_object : str, optional
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
146 File path to save searchCV object
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
147
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
148 outfile_weights : str, optional
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
149 File path to save deep learning model weights
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
150
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
151 groups : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
152 File path to dataset containing groups labels
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
153
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
154 ref_seq : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
155 File path to dataset containing genome sequence file
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
156
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
157 intervals : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
158 File path to dataset containing interval file
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
159
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
160 targets : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
161 File path to dataset compressed target bed file
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
162
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
163 fasta_path : str
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
164 File path to dataset containing fasta file
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
165 """
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
166 warnings.simplefilter("ignore")
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
167
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
168 with open(inputs, "r") as param_handler:
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
169 params = json.load(param_handler)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
170
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
171 # load estimator
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
172 estimator = load_model_from_h5(infile_estimator)
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
173 estimator = clean_params(estimator)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
174
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
175 # swap hyperparameter
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
176 swapping = params["experiment_schemes"]["hyperparams_swapping"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
177 swap_params = _eval_swap_params(swapping)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
178 estimator.set_params(**swap_params)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
179
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
180 estimator_params = estimator.get_params()
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
181
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
182 # store read dataframe object
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
183 loaded_df = {}
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
184
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
185 input_type = params["input_options"]["selected_input"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
186 # tabular input
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
187 if input_type == "tabular":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
188 header = "infer" if params["input_options"]["header1"] else None
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
189 column_option = params["input_options"]["column_selector_options_1"][
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
190 "selected_column_selector_option"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
191 ]
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
192 if column_option in [
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
193 "by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
194 "all_but_by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
195 "by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
196 "all_but_by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
197 ]:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
198 c = params["input_options"]["column_selector_options_1"]["col1"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
199 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
200 c = None
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
201
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
202 df_key = infile1 + repr(header)
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
203 df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
204 loaded_df[df_key] = df
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
205
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
206 X = read_columns(df, c=c, c_option=column_option).astype(float)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
207 # sparse input
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
208 elif input_type == "sparse":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
209 X = mmread(open(infile1, "r"))
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
210
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
211 # fasta_file input
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
212 elif input_type == "seq_fasta":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
213 pyfaidx = get_module("pyfaidx")
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
214 sequences = pyfaidx.Fasta(fasta_path)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
215 n_seqs = len(sequences.keys())
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
216 X = np.arange(n_seqs)[:, np.newaxis]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
217 for param in estimator_params.keys():
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
218 if param.endswith("fasta_path"):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
219 estimator.set_params(**{param: fasta_path})
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
220 break
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
221 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
222 raise ValueError(
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
223 "The selected estimator doesn't support "
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
224 "fasta file input! Please consider using "
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
225 "KerasGBatchClassifier with "
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
226 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
227 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
228 "in pipeline!"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
229 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
230
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
231 elif input_type == "refseq_and_interval":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
232 path_params = {
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
233 "data_batch_generator__ref_genome_path": ref_seq,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
234 "data_batch_generator__intervals_path": intervals,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
235 "data_batch_generator__target_path": targets,
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
236 }
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
237 estimator.set_params(**path_params)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
238 n_intervals = sum(1 for line in open(intervals))
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
239 X = np.arange(n_intervals)[:, np.newaxis]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
240
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
241 # Get target y
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
242 header = "infer" if params["input_options"]["header2"] else None
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
243 column_option = params["input_options"]["column_selector_options_2"][
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
244 "selected_column_selector_option2"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
245 ]
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
246 if column_option in [
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
247 "by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
248 "all_but_by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
249 "by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
250 "all_but_by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
251 ]:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
252 c = params["input_options"]["column_selector_options_2"]["col2"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
253 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
254 c = None
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
255
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
256 df_key = infile2 + repr(header)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
257 if df_key in loaded_df:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
258 infile2 = loaded_df[df_key]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
259 else:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
260 infile2 = pd.read_csv(infile2, sep="\t", header=header, parse_dates=True)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
261 loaded_df[df_key] = infile2
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
262
31
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
263 y = read_columns(
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
264 infile2, c=c, c_option=column_option, sep="\t", header=header, parse_dates=True
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
265 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
266 if len(y.shape) == 2 and y.shape[1] == 1:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
267 y = y.ravel()
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
268 if input_type == "refseq_and_interval":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
269 estimator.set_params(data_batch_generator__features=y.ravel().tolist())
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
270 y = None
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
271 # end y
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
272
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
273 # load groups
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
274 if groups:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
275 groups_selector = (
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
276 params["experiment_schemes"]["test_split"]["split_algos"]
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
277 ).pop("groups_selector")
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
278
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
279 header = "infer" if groups_selector["header_g"] else None
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
280 column_option = groups_selector["column_selector_options_g"][
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
281 "selected_column_selector_option_g"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
282 ]
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
283 if column_option in [
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
284 "by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
285 "all_but_by_index_number",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
286 "by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
287 "all_but_by_header_name",
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
288 ]:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
289 c = groups_selector["column_selector_options_g"]["col_g"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
290 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
291 c = None
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
292
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
293 df_key = groups + repr(header)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
294 if df_key in loaded_df:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
295 groups = loaded_df[df_key]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
296
31
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
297 groups = read_columns(
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
298 groups,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
299 c=c,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
300 c_option=column_option,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
301 sep="\t",
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
302 header=header,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
303 parse_dates=True,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
304 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
305 groups = groups.ravel()
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
306
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
307 # del loaded_df
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
308 del loaded_df
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
309
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
310 # handle memory
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
311 memory = joblib.Memory(location=CACHE_DIR, verbose=0)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
312 # cache iraps_core fits could increase search speed significantly
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
313 if estimator.__class__.__name__ == "IRAPSClassifier":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
314 estimator.set_params(memory=memory)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
315 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
316 # For iraps buried in pipeline
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
317 new_params = {}
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
318 for p, v in estimator_params.items():
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
319 if p.endswith("memory"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
320 # for case of `__irapsclassifier__memory`
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
321 if len(p) > 8 and p[:-8].endswith("irapsclassifier"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
322 # cache iraps_core fits could increase search
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
323 # speed significantly
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
324 new_params[p] = memory
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
325 # security reason, we don't want memory being
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
326 # modified unexpectedly
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
327 elif v:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
328 new_params[p] = None
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
329 # handle n_jobs
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
330 elif p.endswith("n_jobs"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
331 # For now, 1 CPU is suggested for iprasclassifier
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
332 if len(p) > 8 and p[:-8].endswith("irapsclassifier"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
333 new_params[p] = 1
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
334 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
335 new_params[p] = N_JOBS
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
336 # for security reason, types of callback are limited
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
337 elif p.endswith("callbacks"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
338 for cb in v:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
339 cb_type = cb["callback_selection"]["callback_type"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
340 if cb_type not in ALLOWED_CALLBACKS:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
341 raise ValueError("Prohibited callback type: %s!" % cb_type)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
342
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
343 estimator.set_params(**new_params)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
344
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
345 # handle scorer, convert to scorer dict
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
346 # Check if scoring is specified
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
347 scoring = params["experiment_schemes"]["metrics"].get("scoring", None)
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
348 if scoring is not None:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
349 # get_scoring() expects secondary_scoring to be a comma separated string (not a list)
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
350 # Check if secondary_scoring is specified
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
351 secondary_scoring = scoring.get("secondary_scoring", None)
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
352 if secondary_scoring is not None:
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
353 # If secondary_scoring is specified, convert the list into comman separated string
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
354 scoring["secondary_scoring"] = ",".join(scoring["secondary_scoring"])
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
355 scorer = get_scoring(scoring)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
356
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
357 # handle test (first) split
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
358 test_split_options = params["experiment_schemes"]["test_split"]["split_algos"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
359
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
360 if test_split_options["shuffle"] == "group":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
361 test_split_options["labels"] = groups
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
362 if test_split_options["shuffle"] == "stratified":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
363 if y is not None:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
364 test_split_options["labels"] = y
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
365 else:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
366 raise ValueError(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
367 "Stratified shuffle split is not " "applicable on empty target values!"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
368 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
369
31
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
370 (
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
371 X_train,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
372 X_test,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
373 y_train,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
374 y_test,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
375 groups_train,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
376 _groups_test,
5773e98921fc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
bgruening
parents: 30
diff changeset
377 ) = train_test_split_none(X, y, groups, **test_split_options)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
378
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
379 exp_scheme = params["experiment_schemes"]["selected_exp_scheme"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
380
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
381 # handle validation (second) split
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
382 if exp_scheme == "train_val_test":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
383 val_split_options = params["experiment_schemes"]["val_split"]["split_algos"]
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
384
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
385 if val_split_options["shuffle"] == "group":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
386 val_split_options["labels"] = groups_train
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
387 if val_split_options["shuffle"] == "stratified":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
388 if y_train is not None:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
389 val_split_options["labels"] = y_train
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
390 else:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
391 raise ValueError(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
392 "Stratified shuffle split is not "
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
393 "applicable on empty target values!"
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
394 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
395
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
396 (
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
397 X_train,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
398 X_val,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
399 y_train,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
400 y_val,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
401 groups_train,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
402 _groups_val,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
403 ) = train_test_split_none(X_train, y_train, groups_train, **val_split_options)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
404
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
405 # train and eval
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
406 if hasattr(estimator, "validation_data"):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
407 if exp_scheme == "train_val_test":
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
408 estimator.fit(X_train, y_train, validation_data=(X_val, y_val))
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
409 else:
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
410 estimator.fit(X_train, y_train, validation_data=(X_test, y_test))
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
411 else:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
412 estimator.fit(X_train, y_train)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
413
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
414 if hasattr(estimator, "evaluate"):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
415 scores = estimator.evaluate(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
416 X_test, y_test=y_test, scorer=scorer, is_multimetric=True
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
417 )
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
418 else:
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
419 scores = _score(estimator, X_test, y_test, scorer)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
420 # handle output
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
421 for name, score in scores.items():
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
422 scores[name] = [score]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
423 df = pd.DataFrame(scores)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
424 df = df[sorted(df.columns)]
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
425 df.to_csv(path_or_buf=outfile_result, sep="\t", header=True, index=False)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
426
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
427 memory.clear(warn=False)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
428
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
429 if outfile_object:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
430 main_est = estimator
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
431 if isinstance(estimator, pipeline.Pipeline):
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
432 main_est = estimator.steps[-1][-1]
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
433
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
434 if hasattr(main_est, "model_") and hasattr(main_est, "save_weights"):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
435 if outfile_weights:
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
436 main_est.save_weights(outfile_weights)
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
437 if getattr(main_est, "model_", None):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
438 del main_est.model_
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
439 if getattr(main_est, "fit_params", None):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
440 del main_est.fit_params
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
441 if getattr(main_est, "model_class_", None):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
442 del main_est.model_class_
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
443 if getattr(main_est, "validation_data", None):
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
444 del main_est.validation_data
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
445 if getattr(main_est, "data_generator_", None):
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
446 del main_est.data_generator_
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
447
35
61edd9e5c17f planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
bgruening
parents: 31
diff changeset
448 dump_model_to_h5(estimator, outfile_object)
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
449
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
450
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
451 if __name__ == "__main__":
20
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
452 aparser = argparse.ArgumentParser()
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
453 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
454 aparser.add_argument("-e", "--estimator", dest="infile_estimator")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
455 aparser.add_argument("-X", "--infile1", dest="infile1")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
456 aparser.add_argument("-y", "--infile2", dest="infile2")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
457 aparser.add_argument("-O", "--outfile_result", dest="outfile_result")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
458 aparser.add_argument("-o", "--outfile_object", dest="outfile_object")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
459 aparser.add_argument("-w", "--outfile_weights", dest="outfile_weights")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
460 aparser.add_argument("-g", "--groups", dest="groups")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
461 aparser.add_argument("-r", "--ref_seq", dest="ref_seq")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
462 aparser.add_argument("-b", "--intervals", dest="intervals")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
463 aparser.add_argument("-t", "--targets", dest="targets")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
464 aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
465 args = aparser.parse_args()
0b88494bdcac planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff changeset
466
29
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
467 main(
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
468 args.inputs,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
469 args.infile_estimator,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
470 args.infile1,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
471 args.infile2,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
472 args.outfile_result,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
473 outfile_object=args.outfile_object,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
474 outfile_weights=args.outfile_weights,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
475 groups=args.groups,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
476 ref_seq=args.ref_seq,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
477 intervals=args.intervals,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
478 targets=args.targets,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
479 fasta_path=args.fasta_path,
93f3b307485f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents: 25
diff changeset
480 )