Mercurial > repos > goeckslab > pycaret_predict
comparison feature_importance.py @ 7:f4cb41f458fd draft
planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
| author | goeckslab |
|---|---|
| date | Wed, 09 Jul 2025 01:13:01 +0000 |
| parents | a32ff7201629 |
| children | 1aed7d47c5ec |
comparison
equal
deleted
inserted
replaced
| 6:a32ff7201629 | 7:f4cb41f458fd |
|---|---|
| 118 # Ensure feature alignment | 118 # Ensure feature alignment |
| 119 if hasattr(model, "feature_name_"): | 119 if hasattr(model, "feature_name_"): |
| 120 used_features = model.feature_name_ | 120 used_features = model.feature_name_ |
| 121 elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"): | 121 elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"): |
| 122 used_features = model.booster_.feature_name() | 122 used_features = model.booster_.feature_name() |
| 123 elif hasattr(model, "feature_names_in_"): | |
| 124 # scikitālearn's standard attribute for the names of features used during fit | |
| 125 used_features = list(model.feature_names_in_) | |
| 123 else: | 126 else: |
| 124 used_features = X_transformed.columns | 127 used_features = X_transformed.columns |
| 125 | 128 |
| 126 if any(tc in model_class_name for tc in tree_classes): | 129 if any(tc in model_class_name for tc in tree_classes): |
| 127 explainer = shap.TreeExplainer(model) | 130 explainer = shap.TreeExplainer(model) |
| 128 X_shap = X_transformed[used_features] | 131 X_shap = X_transformed[used_features] |
| 129 shap_values = explainer.shap_values(X_shap) | 132 shap_values = explainer.shap_values(X_shap) |
| 130 plot_X = X_shap | 133 plot_X = X_shap |
| 131 plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)" | 134 plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)" |
| 132 else: | 135 else: |
| 133 sampled_X = X_transformed[used_features].sample(100, random_state=42) | 136 logging.warning(f"len(X_transformed) = {len(X_transformed)}") |
| 137 max_samples = 100 | |
| 138 n_samples = min(max_samples, len(X_transformed)) | |
| 139 sampled_X = X_transformed[used_features].sample( | |
| 140 n=n_samples, | |
| 141 replace=False, | |
| 142 random_state=42 | |
| 143 ) | |
| 134 explainer = shap.KernelExplainer(model.predict, sampled_X) | 144 explainer = shap.KernelExplainer(model.predict, sampled_X) |
| 135 shap_values = explainer.shap_values(sampled_X) | 145 shap_values = explainer.shap_values(sampled_X) |
| 136 plot_X = sampled_X | 146 plot_X = sampled_X |
| 137 plot_title = f"SHAP Summary for {model_class_name} (KernelExplainer)" | 147 plot_title = f"SHAP Summary for {model_class_name} (KernelExplainer)" |
| 138 | 148 |
