Mercurial > repos > goeckslab > pycaret_predict
annotate feature_importance.py @ 16:4fee4504646e draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
| author | goeckslab |
|---|---|
| date | Fri, 28 Nov 2025 22:28:26 +0000 |
| parents | e674b9e946fb |
| children |
| rev | line source |
|---|---|
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
1 import base64 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
2 import logging |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
3 import os |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
4 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
5 import matplotlib.pyplot as plt |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
6 import pandas as pd |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
7 import shap |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
8 from pycaret.classification import ClassificationExperiment |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
9 from pycaret.regression import RegressionExperiment |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
10 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
11 logging.basicConfig(level=logging.DEBUG) |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
12 LOG = logging.getLogger(__name__) |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
13 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
14 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
15 class FeatureImportanceAnalyzer: |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
16 def __init__( |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
17 self, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
18 task_type, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
19 output_dir, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
20 data_path=None, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
21 data=None, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
22 target_col=None, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
23 exp=None, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
24 best_model=None, |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
25 max_plot_features=None, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
26 processed_data=None, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
27 max_shap_rows=None, |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
28 ): |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
29 self.task_type = task_type |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
30 self.output_dir = output_dir |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
31 self.exp = exp |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
32 self.best_model = best_model |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
33 self._skip_messages = [] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
34 self.shap_total_features = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
35 self.shap_used_features = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
36 if isinstance(max_plot_features, int) and max_plot_features > 0: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
37 self.max_plot_features = max_plot_features |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
38 elif max_plot_features is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
39 self.max_plot_features = 30 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
40 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
41 self.max_plot_features = None |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
42 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
43 if exp is not None: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
44 # Assume all configs (data, target) are in exp |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
45 self.data = exp.dataset.copy() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
46 self.target = exp.target_param |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
47 LOG.info("Using provided experiment object") |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
48 else: |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
49 if data is not None: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
50 self.data = data |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
51 LOG.info("Data loaded from memory") |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
52 else: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
53 self.target_col = target_col |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
54 self.data = pd.read_csv(data_path, sep=None, engine="python") |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
55 self.data.columns = self.data.columns.str.replace(".", "_") |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
56 self.data = self.data.fillna(self.data.median(numeric_only=True)) |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
57 self.target = self.data.columns[int(target_col) - 1] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
58 self.exp = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
59 ClassificationExperiment() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
60 if task_type == "classification" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
61 else RegressionExperiment() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
62 ) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
63 if processed_data is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
64 self.data = processed_data |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
65 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
66 self.plots = {} |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
67 self.max_shap_rows = max_shap_rows |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
68 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
69 def _get_feature_names_from_model(self, model): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
70 """Best-effort extraction of feature names seen by the estimator.""" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
71 if model is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
72 return None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
73 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
74 candidates = [model] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
75 if hasattr(model, "named_steps"): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
76 candidates.extend(model.named_steps.values()) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
77 elif hasattr(model, "steps"): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
78 candidates.extend(step for _, step in model.steps) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
79 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
80 for candidate in candidates: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
81 names = getattr(candidate, "feature_names_in_", None) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
82 if names is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
83 return list(names) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
84 return None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
85 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
86 def _get_transformed_frame(self, model=None, prefer_test=True): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
87 """Return a DataFrame that mirrors the matrix fed to the estimator.""" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
88 key_order = ["X_test_transformed", "X_train_transformed"] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
89 if not prefer_test: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
90 key_order.reverse() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
91 key_order.append("X_transformed") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
92 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
93 feature_names = self._get_feature_names_from_model(model) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
94 for key in key_order: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
95 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
96 frame = self.exp.get_config(key) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
97 except KeyError: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
98 continue |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
99 if frame is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
100 continue |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
101 if isinstance(frame, pd.DataFrame): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
102 return frame.copy() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
103 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
104 n_features = frame.shape[1] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
105 except Exception: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
106 continue |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
107 if feature_names and len(feature_names) == n_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
108 return pd.DataFrame(frame, columns=feature_names) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
109 # Fallback to positional names so downstream logic still works |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
110 return pd.DataFrame(frame, columns=[f"f{i}" for i in range(n_features)]) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
111 return None |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
112 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
113 def setup_pycaret(self): |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
114 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup: |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
115 LOG.info("Experiment already set up. Skipping PyCaret setup.") |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
116 return |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
117 LOG.info("Initializing PyCaret") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
118 setup_params = { |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
119 "target": self.target, |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
120 "session_id": 123, |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
121 "html": True, |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
122 "log_experiment": False, |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
123 "system_log": False, |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
124 } |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
125 self.exp.setup(self.data, **setup_params) |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
126 |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
127 def save_tree_importance(self): |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
128 model = self.best_model or self.exp.get_config("best_model") |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
129 processed_frame = self._get_transformed_frame(model, prefer_test=False) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
130 if processed_frame is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
131 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
132 "Unable to determine transformed feature names; skipping tree importance plot." |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
133 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
134 self.tree_model_name = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
135 return |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
136 processed_features = list(processed_frame.columns) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
137 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
138 importances = None |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
139 model_type = model.__class__.__name__ |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
140 self.tree_model_name = model_type |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
141 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
142 if hasattr(model, "feature_importances_"): |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
143 importances = model.feature_importances_ |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
144 elif hasattr(model, "coef_"): |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
145 importances = abs(model.coef_).flatten() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
146 else: |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
147 LOG.warning( |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
148 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance." |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
149 ) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
150 self.tree_model_name = None |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
151 return |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
152 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
153 if len(importances) != len(processed_features): |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
154 model_feature_names = self._get_feature_names_from_model(model) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
155 if model_feature_names and len(model_feature_names) == len(importances): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
156 processed_features = model_feature_names |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
157 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
158 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
159 "Importances (%s) != features (%s). Skipping tree importance.", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
160 len(importances), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
161 len(processed_features), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
162 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
163 self.tree_model_name = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
164 return |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
165 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
166 feature_importances = pd.DataFrame( |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
167 {"Feature": processed_features, "Importance": importances} |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
168 ).sort_values(by="Importance", ascending=False) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
169 cap = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
170 min(self.max_plot_features, len(feature_importances)) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
171 if self.max_plot_features is not None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
172 else len(feature_importances) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
173 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
174 plot_importances = feature_importances.head(cap) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
175 if cap < len(feature_importances): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
176 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
177 "Tree importance plot limited to top %s of %s features", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
178 cap, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
179 len(feature_importances), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
180 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
181 plt.figure(figsize=(10, 6)) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
182 plt.barh( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
183 plot_importances["Feature"], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
184 plot_importances["Importance"], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
185 ) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
186 plt.xlabel("Importance") |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
187 plt.title(f"Feature Importance ({model_type}) (top {cap})") |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
188 plot_path = os.path.join(self.output_dir, "tree_importance.png") |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
189 plt.tight_layout() |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
190 plt.savefig(plot_path, bbox_inches="tight") |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
191 plt.close() |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
192 self.plots["tree_importance"] = plot_path |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
193 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
194 def save_shap_values(self, max_samples=None, max_display=None, max_features=None): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
195 model = self.best_model or self.exp.get_config("best_model") |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
196 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
197 X_data = self._get_transformed_frame(model) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
198 if X_data is None: |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
199 raise RuntimeError("No transformed dataset found for SHAP.") |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
200 |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
201 n_rows, n_features = X_data.shape |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
202 self.shap_total_features = n_features |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
203 feature_cap = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
204 min(self.max_plot_features, n_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
205 if self.max_plot_features is not None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
206 else n_features |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
207 ) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
208 if max_features is None: |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
209 max_features = feature_cap |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
210 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
211 max_features = min(max_features, feature_cap) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
212 display_features = list(X_data.columns) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
213 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
214 try: |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
215 if hasattr(model, "feature_importances_"): |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
216 importances = pd.Series( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
217 model.feature_importances_, index=X_data.columns |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
218 ) |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
219 top_features = importances.nlargest(max_features).index |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
220 elif hasattr(model, "coef_"): |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
221 coef = abs(model.coef_).flatten() |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
222 importances = pd.Series(coef, index=X_data.columns) |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
223 top_features = importances.nlargest(max_features).index |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
224 else: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
225 variances = X_data.var() |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
226 top_features = variances.nlargest(max_features).index |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
227 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
228 candidate_features = list(top_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
229 missing = [f for f in candidate_features if f not in X_data.columns] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
230 display_features = [f for f in candidate_features if f in X_data.columns] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
231 if missing: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
232 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
233 "Dropping %s transformed feature(s) not present in SHAP frame: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
234 len(missing), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
235 missing[:5], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
236 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
237 if display_features and len(display_features) < n_features: |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
238 LOG.info( |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
239 "Restricting SHAP display to top %s of %s features", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
240 len(display_features), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
241 n_features, |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
242 ) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
243 elif not display_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
244 display_features = list(X_data.columns) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
245 except Exception as e: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
246 LOG.warning( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
247 f"Feature limiting failed: {e}. Using all {n_features} features." |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
248 ) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
249 display_features = list(X_data.columns) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
250 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
251 self.shap_used_features = len(display_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
252 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
253 # Apply the column restriction so SHAP only runs on the selected features. |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
254 if display_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
255 X_data = X_data[display_features] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
256 n_rows, n_features = X_data.shape |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
257 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
258 # --- Adaptive row subsampling --- |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
259 if max_samples is None: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
260 if n_rows <= 500: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
261 max_samples = n_rows |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
262 elif n_rows <= 5000: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
263 max_samples = 500 |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
264 else: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
265 max_samples = min(1000, int(n_rows * 0.1)) |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
266 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
267 if self.max_shap_rows is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
268 max_samples = min(max_samples, self.max_shap_rows) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
269 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
270 if n_rows > max_samples: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
271 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}") |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
272 X_data = X_data.sample(max_samples, random_state=42) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
273 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
274 # --- Adaptive feature display --- |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
275 display_cap = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
276 min(self.max_plot_features, len(display_features)) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
277 if self.max_plot_features is not None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
278 else len(display_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
279 ) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
280 if max_display is None: |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
281 max_display = display_cap |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
282 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
283 max_display = min(max_display, display_cap) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
284 if not display_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
285 display_features = list(X_data.columns) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
286 max_display = len(display_features) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
287 |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
288 # Background set |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
289 bg = X_data.sample(min(len(X_data), 100), random_state=42) |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
290 predict_fn = ( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
291 model.predict_proba if hasattr(model, "predict_proba") else model.predict |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
292 ) |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
293 |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
294 # Optimized explainer |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
295 explainer = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
296 explainer_label = None |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
297 if hasattr(model, "feature_importances_"): |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
298 explainer = shap.TreeExplainer( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
299 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1 |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
300 ) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
301 explainer_label = "tree_path_dependent" |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
302 elif hasattr(model, "coef_"): |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
303 explainer = shap.LinearExplainer(model, bg) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
304 explainer_label = "linear" |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
305 else: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
306 explainer = shap.Explainer(predict_fn, bg) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
307 explainer_label = explainer.__class__.__name__ |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
308 |
|
11
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
309 try: |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
310 shap_values = explainer(X_data) |
|
11
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
311 self.shap_model_name = explainer.__class__.__name__ |
|
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
312 except Exception as e: |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
313 error_message = str(e) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
314 needs_tree_fallback = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
315 hasattr(model, "feature_importances_") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
316 and "does not cover all the leaves" in error_message.lower() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
317 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
318 feature_name_mismatch = "feature names should match" in error_message.lower() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
319 if needs_tree_fallback: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
320 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
321 "SHAP computation failed using '%s' perturbation (%s). " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
322 "Retrying with interventional perturbation.", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
323 explainer_label, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
324 error_message, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
325 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
326 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
327 explainer = shap.TreeExplainer( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
328 model, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
329 bg, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
330 feature_perturbation="interventional", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
331 n_jobs=-1, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
332 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
333 shap_values = explainer(X_data) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
334 self.shap_model_name = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
335 f"{explainer.__class__.__name__} (interventional)" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
336 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
337 except Exception as retry_exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
338 LOG.error( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
339 "SHAP computation failed even after fallback: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
340 retry_exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
341 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
342 self.shap_model_name = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
343 return |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
344 elif feature_name_mismatch: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
345 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
346 "SHAP computation failed due to feature-name mismatch (%s). " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
347 "Falling back to model-agnostic SHAP explainer.", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
348 error_message, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
349 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
350 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
351 agnostic_explainer = shap.Explainer(predict_fn, bg) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
352 shap_values = agnostic_explainer(X_data) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
353 self.shap_model_name = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
354 f"{agnostic_explainer.__class__.__name__} (fallback)" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
355 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
356 except Exception as fallback_exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
357 LOG.error( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
358 "Model-agnostic SHAP fallback also failed: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
359 fallback_exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
360 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
361 self.shap_model_name = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
362 return |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
363 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
364 LOG.error(f"SHAP computation failed: {e}") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
365 self.shap_model_name = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
366 return |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
367 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
368 def _limit_explanation_features(explanation): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
369 if len(display_features) >= n_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
370 return explanation |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
371 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
372 limited = explanation[:, display_features] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
373 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
374 "SHAP explanation trimmed to %s display features.", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
375 len(display_features), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
376 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
377 return limited |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
378 except Exception as exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
379 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
380 "Failed to restrict SHAP explanation to top features " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
381 "(sample=%s); plot will include all features. Error: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
382 display_features[:5], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
383 exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
384 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
385 # Keep using full feature list if trimming fails |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
386 return explanation |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
387 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
388 shap_shape = getattr(shap_values, "shape", None) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
389 class_labels = list(getattr(model, "classes_", [])) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
390 shap_outputs = [] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
391 if shap_shape is not None and len(shap_shape) == 3: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
392 output_count = shap_shape[2] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
393 LOG.info("Detected multi-output SHAP explanation with %s classes.", output_count) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
394 for class_idx in range(output_count): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
395 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
396 class_expl = shap_values[..., class_idx] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
397 except Exception as exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
398 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
399 "Failed to extract SHAP explanation for class index %s: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
400 class_idx, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
401 exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
402 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
403 continue |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
404 label = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
405 class_labels[class_idx] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
406 if class_labels and class_idx < len(class_labels) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
407 else class_idx |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
408 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
409 shap_outputs.append((class_idx, label, class_expl)) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
410 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
411 shap_outputs.append((None, None, shap_values)) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
412 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
413 if not shap_outputs: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
414 LOG.error("No SHAP outputs available for plotting.") |
|
11
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
415 self.shap_model_name = None |
|
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
416 return |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
417 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
418 # --- Plot SHAP summary (one per class if needed) --- |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
419 for class_idx, class_label, class_expl in shap_outputs: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
420 expl_to_plot = _limit_explanation_features(class_expl) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
421 suffix = "" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
422 plot_key = "shap_summary" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
423 if class_idx is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
424 safe_label = str(class_label).replace("/", "_").replace(" ", "_") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
425 suffix = f"_class_{safe_label}" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
426 plot_key = f"shap_summary_class_{safe_label}" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
427 out_filename = f"shap_summary{suffix}.png" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
428 out_path = os.path.join(self.output_dir, out_filename) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
429 plt.figure() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
430 shap.plots.beeswarm(expl_to_plot, max_display=max_display, show=False) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
431 title = f"SHAP Summary for {model.__class__.__name__}" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
432 if class_idx is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
433 title += f" (class {class_label})" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
434 plt.title(f"{title} (top {max_display} features)") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
435 plt.tight_layout() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
436 plt.savefig(out_path, bbox_inches="tight") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
437 plt.close() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
438 self.plots[plot_key] = out_path |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
439 |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
440 # --- Log summary --- |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
441 LOG.info( |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
442 "SHAP summary completed with %s rows and %s features " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
443 "(displaying top %s) across %s output(s).", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
444 X_data.shape[0], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
445 X_data.shape[1], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
446 max_display, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
447 len(shap_outputs), |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
448 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
449 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
450 def generate_html_report(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
451 LOG.info("Generating HTML report") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
452 plots_html = "" |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
453 for plot_name, plot_path in self.plots.items(): |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
454 if plot_name == "tree_importance" and not getattr( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
455 self, "tree_model_name", None |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
456 ): |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
457 continue |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
458 encoded_image = self.encode_image_to_base64(plot_path) |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
459 if plot_name == "tree_importance" and getattr( |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
460 self, "tree_model_name", None |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
461 ): |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
462 section_title = f"Feature importance from {self.tree_model_name}" |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
463 elif plot_name == "shap_summary": |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
464 section_title = ( |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
465 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}" |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
466 ) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
467 elif plot_name.startswith("shap_summary_class_"): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
468 class_label = plot_name.replace("shap_summary_class_", "") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
469 section_title = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
470 f"SHAP Summary for class {class_label} " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
471 f"({getattr(self, 'shap_model_name', 'model')})" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
472 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
473 else: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
474 section_title = plot_name |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
475 plots_html += f""" |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
476 <div class="plot" id="{plot_name}" style="text-align:center;margin-bottom:24px;"> |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
477 <h2>{section_title}</h2> |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
478 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
12
diff
changeset
|
479 style="max-width:95%;height:auto;display:block;margin:0 auto;border:1px solid #ddd;padding:8px;background:#fff;"> |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
480 </div> |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
481 """ |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
482 return f"{plots_html}" |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
483 |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
484 def encode_image_to_base64(self, img_path): |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
485 with open(img_path, "rb") as img_file: |
|
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
486 return base64.b64encode(img_file.read()).decode("utf-8") |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
487 |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
488 def run(self): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
489 if ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
490 self.exp is None |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
491 or not hasattr(self.exp, "is_setup") |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
492 or not self.exp.is_setup |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
493 ): |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
494 self.setup_pycaret() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
495 self.save_tree_importance() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
496 self.save_shap_values() |
|
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
497 return self.generate_html_report() |
