comparison feature_importance.py @ 7:f4cb41f458fd draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
author goeckslab
date Wed, 09 Jul 2025 01:13:01 +0000
parents a32ff7201629
children
comparison
equal deleted inserted replaced
6:a32ff7201629 7:f4cb41f458fd
118 # Ensure feature alignment 118 # Ensure feature alignment
119 if hasattr(model, "feature_name_"): 119 if hasattr(model, "feature_name_"):
120 used_features = model.feature_name_ 120 used_features = model.feature_name_
121 elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"): 121 elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"):
122 used_features = model.booster_.feature_name() 122 used_features = model.booster_.feature_name()
123 elif hasattr(model, "feature_names_in_"):
124 # scikit‐learn's standard attribute for the names of features used during fit
125 used_features = list(model.feature_names_in_)
123 else: 126 else:
124 used_features = X_transformed.columns 127 used_features = X_transformed.columns
125 128
126 if any(tc in model_class_name for tc in tree_classes): 129 if any(tc in model_class_name for tc in tree_classes):
127 explainer = shap.TreeExplainer(model) 130 explainer = shap.TreeExplainer(model)
128 X_shap = X_transformed[used_features] 131 X_shap = X_transformed[used_features]
129 shap_values = explainer.shap_values(X_shap) 132 shap_values = explainer.shap_values(X_shap)
130 plot_X = X_shap 133 plot_X = X_shap
131 plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)" 134 plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)"
132 else: 135 else:
133 sampled_X = X_transformed[used_features].sample(100, random_state=42) 136 logging.warning(f"len(X_transformed) = {len(X_transformed)}")
137 max_samples = 100
138 n_samples = min(max_samples, len(X_transformed))
139 sampled_X = X_transformed[used_features].sample(
140 n=n_samples,
141 replace=False,
142 random_state=42
143 )
134 explainer = shap.KernelExplainer(model.predict, sampled_X) 144 explainer = shap.KernelExplainer(model.predict, sampled_X)
135 shap_values = explainer.shap_values(sampled_X) 145 shap_values = explainer.shap_values(sampled_X)
136 plot_X = sampled_X 146 plot_X = sampled_X
137 plot_title = f"SHAP Summary for {model_class_name} (KernelExplainer)" 147 plot_title = f"SHAP Summary for {model_class_name} (KernelExplainer)"
138 148