Mercurial > repos > goeckslab > pycaret_predict
diff feature_importance.py @ 7:f4cb41f458fd draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
author | goeckslab |
---|---|
date | Wed, 09 Jul 2025 01:13:01 +0000 |
parents | a32ff7201629 |
children |
line wrap: on
line diff
--- a/feature_importance.py Wed Jul 02 19:00:03 2025 +0000 +++ b/feature_importance.py Wed Jul 09 01:13:01 2025 +0000 @@ -120,6 +120,9 @@ used_features = model.feature_name_ elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"): used_features = model.booster_.feature_name() + elif hasattr(model, "feature_names_in_"): + # scikitālearn's standard attribute for the names of features used during fit + used_features = list(model.feature_names_in_) else: used_features = X_transformed.columns @@ -130,7 +133,14 @@ plot_X = X_shap plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)" else: - sampled_X = X_transformed[used_features].sample(100, random_state=42) + logging.warning(f"len(X_transformed) = {len(X_transformed)}") + max_samples = 100 + n_samples = min(max_samples, len(X_transformed)) + sampled_X = X_transformed[used_features].sample( + n=n_samples, + replace=False, + random_state=42 + ) explainer = shap.KernelExplainer(model.predict, sampled_X) shap_values = explainer.shap_values(sampled_X) plot_X = sampled_X