# HG changeset patch
# User goeckslab
# Date 1733887740 0
# Node ID 1bc26b9636d2d4429809fe66ffbbd515303c49cd
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit 5089a5dffc154c8202624cfbd5f1be0f36a9f0cc
diff -r 000000000000 -r 1bc26b9636d2 base_model_trainer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/base_model_trainer.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,359 @@
+import base64
+import logging
+import os
+import tempfile
+
+from feature_importance import FeatureImportanceAnalyzer
+
+import h5py
+
+import joblib
+
+import numpy as np
+
+import pandas as pd
+
+from sklearn.metrics import average_precision_score
+
+from utils import get_html_closing, get_html_template
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class BaseModelTrainer:
+
+    def __init__(
+            self,
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file=None,
+            **kwargs
+            ):
+        self.exp = None  # This will be set in the subclass
+        self.input_file = input_file
+        self.target_col = target_col
+        self.output_dir = output_dir
+        self.task_type = task_type
+        self.random_seed = random_seed
+        self.data = None
+        self.target = None
+        self.best_model = None
+        self.results = None
+        self.features_name = None
+        self.plots = {}
+        self.expaliner = None
+        self.plots_explainer_html = None
+        self.trees = []
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+        self.setup_params = {}
+        self.test_file = test_file
+        self.test_data = None
+
+        LOG.info(f"Model kwargs: {self.__dict__}")
+
+    def load_data(self):
+        LOG.info(f"Loading data from {self.input_file}")
+        self.data = pd.read_csv(self.input_file, sep=None, engine='python')
+        self.data.columns = self.data.columns.str.replace('.', '_')
+
+        numeric_cols = self.data.select_dtypes(include=['number']).columns
+        non_numeric_cols = self.data.select_dtypes(exclude=['number']).columns
+
+        self.data[numeric_cols] = self.data[numeric_cols].apply(
+            pd.to_numeric, errors='coerce')
+
+        if len(non_numeric_cols) > 0:
+            LOG.info(f"Non-numeric columns found: {non_numeric_cols.tolist()}")
+
+        names = self.data.columns.to_list()
+        target_index = int(self.target_col)-1
+        self.target = names[target_index]
+        self.features_name = [name
+                              for i, name in enumerate(names)
+                              if i != target_index]
+        if hasattr(self, 'missing_value_strategy'):
+            if self.missing_value_strategy == 'mean':
+                self.data = self.data.fillna(
+                    self.data.mean(numeric_only=True))
+            elif self.missing_value_strategy == 'median':
+                self.data = self.data.fillna(
+                    self.data.median(numeric_only=True))
+            elif self.missing_value_strategy == 'drop':
+                self.data = self.data.dropna()
+        else:
+            # Default strategy if not specified
+            self.data = self.data.fillna(self.data.median(numeric_only=True))
+
+        if self.test_file:
+            LOG.info(f"Loading test data from {self.test_file}")
+            self.test_data = pd.read_csv(
+                self.test_file, sep=None, engine='python')
+            self.test_data = self.test_data[numeric_cols].apply(
+                pd.to_numeric, errors='coerce')
+            self.test_data.columns = self.test_data.columns.str.replace(
+                '.', '_'
+                )
+
+    def setup_pycaret(self):
+        LOG.info("Initializing PyCaret")
+        self.setup_params = {
+            'target': self.target,
+            'session_id': self.random_seed,
+            'html': True,
+            'log_experiment': False,
+            'system_log': False,
+            'index': False,
+        }
+
+        if self.test_data is not None:
+            self.setup_params['test_data'] = self.test_data
+
+        if hasattr(self, 'train_size') and self.train_size is not None \
+                and self.test_data is None:
+            self.setup_params['train_size'] = self.train_size
+
+        if hasattr(self, 'normalize') and self.normalize is not None:
+            self.setup_params['normalize'] = self.normalize
+
+        if hasattr(self, 'feature_selection') and \
+                self.feature_selection is not None:
+            self.setup_params['feature_selection'] = self.feature_selection
+
+        if hasattr(self, 'cross_validation') and \
+                self.cross_validation is not None \
+                and self.cross_validation is False:
+            self.setup_params['cross_validation'] = self.cross_validation
+
+        if hasattr(self, 'cross_validation') and \
+                self.cross_validation is not None:
+            if hasattr(self, 'cross_validation_folds'):
+                self.setup_params['fold'] = self.cross_validation_folds
+
+        if hasattr(self, 'remove_outliers') and \
+                self.remove_outliers is not None:
+            self.setup_params['remove_outliers'] = self.remove_outliers
+
+        if hasattr(self, 'remove_multicollinearity') and \
+                self.remove_multicollinearity is not None:
+            self.setup_params['remove_multicollinearity'] = \
+                self.remove_multicollinearity
+
+        if hasattr(self, 'polynomial_features') and \
+                self.polynomial_features is not None:
+            self.setup_params['polynomial_features'] = self.polynomial_features
+
+        if hasattr(self, 'fix_imbalance') and \
+                self.fix_imbalance is not None:
+            self.setup_params['fix_imbalance'] = self.fix_imbalance
+
+        LOG.info(self.setup_params)
+        self.exp.setup(self.data, **self.setup_params)
+
+    def train_model(self):
+        LOG.info("Training and selecting the best model")
+        if self.task_type == "classification":
+            average_displayed = "Weighted"
+            self.exp.add_metric(id=f'PR-AUC-{average_displayed}',
+                                name=f'PR-AUC-{average_displayed}',
+                                target='pred_proba',
+                                score_func=average_precision_score,
+                                average='weighted'
+                                )
+
+        if hasattr(self, 'models') and self.models is not None:
+            self.best_model = self.exp.compare_models(
+                include=self.models)
+        else:
+            self.best_model = self.exp.compare_models()
+        self.results = self.exp.pull()
+        if self.task_type == "classification":
+            self.results.rename(columns={'AUC': 'ROC-AUC'}, inplace=True)
+
+        _ = self.exp.predict_model(self.best_model)
+        self.test_result_df = self.exp.pull()
+        if self.task_type == "classification":
+            self.test_result_df.rename(
+                columns={'AUC': 'ROC-AUC'}, inplace=True)
+
+    def save_model(self):
+        hdf5_model_path = "pycaret_model.h5"
+        with h5py.File(hdf5_model_path, 'w') as f:
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                joblib.dump(self.best_model, temp_file.name)
+                temp_file.seek(0)
+                model_bytes = temp_file.read()
+            f.create_dataset('model', data=np.void(model_bytes))
+
+    def generate_plots(self):
+        raise NotImplementedError("Subclasses should implement this method")
+
+    def encode_image_to_base64(self, img_path):
+        with open(img_path, 'rb') as img_file:
+            return base64.b64encode(img_file.read()).decode('utf-8')
+
+    def save_html_report(self):
+        LOG.info("Saving HTML report")
+
+        model_name = type(self.best_model).__name__
+        excluded_params = ['html', 'log_experiment', 'system_log', 'test_data']
+        filtered_setup_params = {
+            k: v
+            for k, v in self.setup_params.items() if k not in excluded_params
+        }
+        setup_params_table = pd.DataFrame(
+            list(filtered_setup_params.items()),
+            columns=['Parameter', 'Value'])
+
+        best_model_params = pd.DataFrame(
+            self.best_model.get_params().items(),
+            columns=['Parameter', 'Value'])
+        best_model_params.to_csv(
+            os.path.join(self.output_dir, 'best_model.csv'),
+            index=False)
+        self.results.to_csv(os.path.join(
+            self.output_dir, "comparison_results.csv"))
+        self.test_result_df.to_csv(os.path.join(
+            self.output_dir, "test_results.csv"))
+
+        plots_html = ""
+        length = len(self.plots)
+        for i, (plot_name, plot_path) in enumerate(self.plots.items()):
+            encoded_image = self.encode_image_to_base64(plot_path)
+            plots_html += f"""
+            
+                
{plot_name.capitalize()}
+                

+            
"
+
+        tree_plots = ""
+        for i, tree in enumerate(self.trees):
+            if tree:
+                tree_plots += f"""
+                
+                    
Tree {i+1}
+                    

+                
PyCaret Model Training Report
+            
+                
+                Setup & Best Model
+                
+                Best Model Plots
+                
+                Feature Importance
+                
+                Explainer
+                
+            
+                
Setup Parameters
+                
+                    | Parameter | Value | 
|---|
+                    {setup_params_table.to_html(
+                        index=False, header=False, classes='table')}
+                
+                
If you want to know all the experiment setup parameters,
+                  please check the PyCaret documentation for
+                  the classification/regression exp function.
+                
Best Model: {model_name}
+                
+                    | Parameter | Value | 
|---|
+                    {best_model_params.to_html(
+                        index=False, header=False, classes='table')}
+                
+                
Comparison Results on the Cross-Validation Set
+                
+                    {self.results.to_html(index=False, classes='table')}
+                
+                
Results on the Test Set for the best model
+                
+                    {self.test_result_df.to_html(index=False, classes='table')}
+                
+            
+                
Best Model Plots on the testing set
+                {plots_html}
+            
+            
+                {feature_importance_html}
+            
+            
+                {self.plots_explainer_html}
+                {tree_plots}
+            
+        {get_html_closing()}
+        """
+
+        with open(os.path.join(
+                self.output_dir, "comparison_result.html"), "w") as file:
+            file.write(html_content)
+
+    def save_dashboard(self):
+        raise NotImplementedError("Subclasses should implement this method")
+
+    def generate_plots_explainer(self):
+        raise NotImplementedError("Subclasses should implement this method")
+
+    # not working now
+    def generate_tree_plots(self):
+        from sklearn.ensemble import RandomForestClassifier, \
+            RandomForestRegressor
+        from xgboost import XGBClassifier, XGBRegressor
+        from explainerdashboard.explainers import RandomForestExplainer
+
+        LOG.info("Generating tree plots")
+        X_test = self.exp.X_test_transformed.copy()
+        y_test = self.exp.y_test_transformed
+
+        is_rf = isinstance(self.best_model, RandomForestClassifier) or \
+            isinstance(self.best_model, RandomForestRegressor)
+
+        is_xgb = isinstance(self.best_model, XGBClassifier) or \
+            isinstance(self.best_model, XGBRegressor)
+
+        try:
+            if is_rf:
+                num_trees = self.best_model.n_estimators
+            if is_xgb:
+                num_trees = len(self.best_model.get_booster().get_dump())
+            explainer = RandomForestExplainer(self.best_model, X_test, y_test)
+            for i in range(num_trees):
+                fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
+                LOG.info(f"Tree {i+1}")
+                LOG.info(fig)
+                self.trees.append(fig)
+        except Exception as e:
+            LOG.error(f"Error generating tree plots: {e}")
+
+    def run(self):
+        self.load_data()
+        self.setup_pycaret()
+        self.train_model()
+        self.save_model()
+        self.generate_plots()
+        self.generate_plots_explainer()
+        self.generate_tree_plots()
+        self.save_html_report()
+        # self.save_dashboard()
diff -r 000000000000 -r 1bc26b9636d2 dashboard.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dashboard.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,159 @@
+import logging
+from typing import Any, Dict, Optional
+
+from pycaret.utils.generic import get_label_encoder
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def generate_classifier_explainer_dashboard(
+        exp,
+        estimator,
+        display_format: str = "dash",
+        dashboard_kwargs: Optional[Dict[str, Any]] = None,
+        run_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,):
+
+    """
+        This function is changed from pycaret.classification.oop.dashboard()
+
+        This function generates the interactive dashboard for a trained model.
+        The dashboard is implemented using
+        ExplainerDashboard (explainerdashboard.readthedocs.io)
+
+
+        estimator: scikit-learn compatible object
+            Trained model object
+
+
+        display_format: str, default = 'dash'
+            Render mode for the dashboard. The default is set to ``dash``
+            which will
+            render a dashboard in browser. There are four possible options:
+
+            - 'dash' - displays the dashboard in browser
+            - 'inline' - displays the dashboard in the jupyter notebook cell.
+            - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+            - 'external' - displays the dashboard in a separate tab.
+                (use in Colab)
+
+
+        dashboard_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+        run_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``run``
+            method of ``ExplainerDashboard``.
+
+
+        **kwargs:
+            Additional keyword arguments to pass to the ``ClassifierExplainer``
+            or ``RegressionExplainer`` class.
+
+
+        Returns:
+            ExplainerDashboard
+    """
+
+    dashboard_kwargs = dashboard_kwargs or {}
+    run_kwargs = run_kwargs or {}
+
+    from explainerdashboard import ClassifierExplainer, ExplainerDashboard
+
+    le = get_label_encoder(exp.pipeline)
+    if le:
+        labels_ = list(le.classes_)
+    else:
+        labels_ = None
+
+    # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+
+    X_test_df = exp.X_test_transformed.copy()
+    LOG.info(X_test_df)
+    X_test_df.columns = [
+        col.replace(".", "__").replace("{", "__").replace("}", "__")
+        for col in X_test_df.columns
+    ]
+
+    explainer = ClassifierExplainer(
+        estimator, X_test_df, exp.y_test_transformed, labels=labels_, **kwargs
+    )
+    return ExplainerDashboard(
+        explainer, mode=display_format,
+        contributions=False, whatif=False,
+        **dashboard_kwargs
+    )
+
+
+def generate_regression_explainer_dashboard(
+        exp,
+        estimator,
+        display_format: str = "dash",
+        dashboard_kwargs: Optional[Dict[str, Any]] = None,
+        run_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,):
+
+    """
+    This function is changed from pycaret.regression.oop.dashboard()
+
+        This function generates the interactive dashboard for a trained model.
+        The dashboard is implemented using ExplainerDashboard
+        (explainerdashboard.readthedocs.io)
+
+
+        estimator: scikit-learn compatible object
+            Trained model object
+
+
+        display_format: str, default = 'dash'
+            Render mode for the dashboard. The default is set to ``dash``
+            which will
+            render a dashboard in browser. There are four possible options:
+
+            - 'dash' - displays the dashboard in browser
+            - 'inline' - displays the dashboard in the jupyter notebook cell.
+            - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+            - 'external' - displays the dashboard in a separate tab.
+            (use in Colab)
+
+
+        dashboard_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+        run_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``run`` method
+            of ``ExplainerDashboard``.
+
+
+        **kwargs:
+            Additional keyword arguments to pass to the
+            ``ClassifierExplainer`` or
+            ``RegressionExplainer`` class.
+
+
+        Returns:
+            ExplainerDashboard
+    """
+
+    dashboard_kwargs = dashboard_kwargs or {}
+    run_kwargs = run_kwargs or {}
+
+    from explainerdashboard import ExplainerDashboard, RegressionExplainer
+
+    # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+    X_test_df = exp.X_test_transformed.copy()
+    X_test_df.columns = [
+        col.replace(".", "__").replace("{", "__").replace("}", "__")
+        for col in X_test_df.columns
+    ]
+    explainer = RegressionExplainer(
+        estimator, X_test_df, exp.y_test_transformed, **kwargs
+    )
+    return ExplainerDashboard(
+        explainer, mode=display_format, contributions=False,
+        whatif=False, shap_interaction=False, decision_trees=False,
+        **dashboard_kwargs
+    )
diff -r 000000000000 -r 1bc26b9636d2 feature_importance.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_importance.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,171 @@
+import base64
+import logging
+import os
+
+import matplotlib.pyplot as plt
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class FeatureImportanceAnalyzer:
+    def __init__(
+            self,
+            task_type,
+            output_dir,
+            data_path=None,
+            data=None,
+            target_col=None):
+
+        if data is not None:
+            self.data = data
+            LOG.info("Data loaded from memory")
+        else:
+            self.target_col = target_col
+            self.data = pd.read_csv(data_path, sep=None, engine='python')
+            self.data.columns = self.data.columns.str.replace('.', '_')
+            self.data = self.data.fillna(self.data.median(numeric_only=True))
+        self.task_type = task_type
+        self.target = self.data.columns[int(target_col) - 1]
+        self.exp = ClassificationExperiment() \
+            if task_type == 'classification' \
+            else RegressionExperiment()
+        self.plots = {}
+        self.output_dir = output_dir
+
+    def setup_pycaret(self):
+        LOG.info("Initializing PyCaret")
+        setup_params = {
+            'target': self.target,
+            'session_id': 123,
+            'html': True,
+            'log_experiment': False,
+            'system_log': False
+        }
+        LOG.info(self.task_type)
+        LOG.info(self.exp)
+        self.exp.setup(self.data, **setup_params)
+
+    # def save_coefficients(self):
+    #     model = self.exp.create_model('lr')
+    #     coef_df = pd.DataFrame({
+    #         'Feature': self.data.columns.drop(self.target),
+    #         'Coefficient': model.coef_[0]
+    #     })
+    #     coef_html = coef_df.to_html(index=False)
+    #     return coef_html
+
+    def save_tree_importance(self):
+        model = self.exp.create_model('rf')
+        importances = model.feature_importances_
+        processed_features = self.exp.get_config('X_transformed').columns
+        LOG.debug(f"Feature importances: {importances}")
+        LOG.debug(f"Features: {processed_features}")
+        feature_importances = pd.DataFrame({
+            'Feature': processed_features,
+            'Importance': importances
+        }).sort_values(by='Importance', ascending=False)
+        plt.figure(figsize=(10, 6))
+        plt.barh(
+            feature_importances['Feature'],
+            feature_importances['Importance'])
+        plt.xlabel('Importance')
+        plt.title('Feature Importance (Random Forest)')
+        plot_path = os.path.join(
+            self.output_dir,
+            'tree_importance.png')
+        plt.savefig(plot_path)
+        plt.close()
+        self.plots['tree_importance'] = plot_path
+
+    def save_shap_values(self):
+        model = self.exp.create_model('lightgbm')
+        import shap
+        explainer = shap.Explainer(model)
+        shap_values = explainer.shap_values(
+            self.exp.get_config('X_transformed'))
+        shap.summary_plot(shap_values,
+                          self.exp.get_config('X_transformed'), show=False)
+        plt.title('Shap (LightGBM)')
+        plot_path = os.path.join(
+            self.output_dir, 'shap_summary.png')
+        plt.savefig(plot_path)
+        plt.close()
+        self.plots['shap_summary'] = plot_path
+
+    def generate_feature_importance(self):
+        # coef_html = self.save_coefficients()
+        self.save_tree_importance()
+        self.save_shap_values()
+
+    def encode_image_to_base64(self, img_path):
+        with open(img_path, 'rb') as img_file:
+            return base64.b64encode(img_file.read()).decode('utf-8')
+
+    def generate_html_report(self):
+        LOG.info("Generating HTML report")
+
+        # Read and encode plot images
+        plots_html = ""
+        for plot_name, plot_path in self.plots.items():
+            encoded_image = self.encode_image_to_base64(plot_path)
+            plots_html += f"""
+            
+                
{'Feature importance analysis from a'
+                    'trained Random Forest'
+                    if plot_name == 'tree_importance'
+                    else 'SHAP Summary from a trained lightgbm'}
+                
{'Use gini impurity for'
+                    'calculating feature importance for classification'
+                    'and Variance Reduction for regression'
+                  if plot_name == 'tree_importance'
+                  else ''}
+                

+            
PyCaret Feature Importance Report
+            {plots_html}
+        """
+
+        return html_content
+
+    def run(self):
+        LOG.info("Running feature importance analysis")
+        self.setup_pycaret()
+        self.generate_feature_importance()
+        html_content = self.generate_html_report()
+        LOG.info("Feature importance analysis completed")
+        return html_content
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Feature Importance Analysis")
+    parser.add_argument(
+        "--data_path", type=str, help="Path to the dataset")
+    parser.add_argument(
+        "--target_col", type=int,
+        help="Index of the target column (1-based)")
+    parser.add_argument(
+        "--task_type", type=str,
+        choices=["classification", "regression"],
+        help="Task type: classification or regression")
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        help="Directory to save the outputs")
+    args = parser.parse_args()
+
+    analyzer = FeatureImportanceAnalyzer(
+        args.data_path, args.target_col,
+        args.task_type, args.output_dir)
+    analyzer.run()
diff -r 000000000000 -r 1bc26b9636d2 pycaret_classification.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_classification.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,204 @@
+import logging
+
+from base_model_trainer import BaseModelTrainer
+
+from dashboard import generate_classifier_explainer_dashboard
+
+from pycaret.classification import ClassificationExperiment
+
+from utils import add_hr_to_html, add_plot_to_html
+
+LOG = logging.getLogger(__name__)
+
+
+class ClassificationModelTrainer(BaseModelTrainer):
+    def __init__(
+            self,
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file=None,
+            **kwargs):
+        super().__init__(
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file,
+            **kwargs)
+        self.exp = ClassificationExperiment()
+
+    def save_dashboard(self):
+        LOG.info("Saving explainer dashboard")
+        dashboard = generate_classifier_explainer_dashboard(self.exp,
+                                                            self.best_model)
+        dashboard.save_html("dashboard.html")
+
+    def generate_plots(self):
+        LOG.info("Generating and saving plots")
+        plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+                 'error', 'class_report', 'learning', 'calibration',
+                 'vc', 'dimension', 'manifold', 'rfe', 'feature',
+                 'feature_all']
+        for plot_name in plots:
+            try:
+                if plot_name == 'auc' and not self.exp.is_multiclass:
+                    plot_path = self.exp.plot_model(self.best_model,
+                                                    plot=plot_name,
+                                                    save=True,
+                                                    plot_kwargs={
+                                                        'micro': False,
+                                                        'macro': False,
+                                                        'per_class': False,
+                                                        'binary': True
+                                                        }
+                                                    )
+                    self.plots[plot_name] = plot_path
+                    continue
+
+                plot_path = self.exp.plot_model(self.best_model,
+                                                plot=plot_name, save=True)
+                self.plots[plot_name] = plot_path
+            except Exception as e:
+                LOG.error(f"Error generating plot {plot_name}: {e}")
+                continue
+
+    def generate_plots_explainer(self):
+        LOG.info("Generating and saving plots from explainer")
+
+        from explainerdashboard import ClassifierExplainer
+
+        X_test = self.exp.X_test_transformed.copy()
+        y_test = self.exp.y_test_transformed
+
+        explainer = ClassifierExplainer(self.best_model, X_test, y_test)
+        self.expaliner = explainer
+        plots_explainer_html = ""
+
+        try:
+            fig_importance = explainer.plot_importances()
+            plots_explainer_html += add_plot_to_html(fig_importance)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance(mean shap): {e}")
+
+        try:
+            fig_importance_perm = explainer.plot_importances(
+                kind="permutation")
+            plots_explainer_html += add_plot_to_html(fig_importance_perm)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance(permutation): {e}")
+
+        # try:
+        #     fig_shap = explainer.plot_shap_summary()
+        #     plots_explainer_html += add_plot_to_html(fig_shap,
+        #       include_plotlyjs=False)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot shap: {e}")
+
+        # try:
+        #     fig_contributions = explainer.plot_contributions(
+        #       index=0)
+        #     plots_explainer_html += add_plot_to_html(
+        #       fig_contributions, include_plotlyjs=False)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot contributions: {e}")
+
+        # try:
+        #     for feature in self.features_name:
+        #         fig_dependence = explainer.plot_dependence(col=feature)
+        #         plots_explainer_html += add_plot_to_html(fig_dependence)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot dependencies: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_pdp = explainer.plot_pdp(feature)
+                plots_explainer_html += add_plot_to_html(fig_pdp)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot pdp: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_interaction = explainer.plot_interaction(
+                    col=feature, interact_col=feature)
+                plots_explainer_html += add_plot_to_html(fig_interaction)
+        except Exception as e:
+            LOG.error(f"Error generating plot interactions: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_interactions_importance = \
+                    explainer.plot_interactions_importance(
+                        col=feature)
+                plots_explainer_html += add_plot_to_html(
+                    fig_interactions_importance)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot interactions importance: {e}")
+
+        # try:
+        #     for feature in self.features_name:
+        #         fig_interactions_detailed = \
+        #           explainer.plot_interactions_detailed(
+        #               col=feature)
+        #         plots_explainer_html += add_plot_to_html(
+        #           fig_interactions_detailed)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot interactions detailed: {e}")
+
+        try:
+            fig_precision = explainer.plot_precision()
+            plots_explainer_html += add_plot_to_html(fig_precision)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot precision: {e}")
+
+        try:
+            fig_cumulative_precision = explainer.plot_cumulative_precision()
+            plots_explainer_html += add_plot_to_html(fig_cumulative_precision)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot cumulative precision: {e}")
+
+        try:
+            fig_classification = explainer.plot_classification()
+            plots_explainer_html += add_plot_to_html(fig_classification)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot classification: {e}")
+
+        try:
+            fig_confusion_matrix = explainer.plot_confusion_matrix()
+            plots_explainer_html += add_plot_to_html(fig_confusion_matrix)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot confusion matrix: {e}")
+
+        try:
+            fig_lift_curve = explainer.plot_lift_curve()
+            plots_explainer_html += add_plot_to_html(fig_lift_curve)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot lift curve: {e}")
+
+        try:
+            fig_roc_auc = explainer.plot_roc_auc()
+            plots_explainer_html += add_plot_to_html(fig_roc_auc)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot roc auc: {e}")
+
+        try:
+            fig_pr_auc = explainer.plot_pr_auc()
+            plots_explainer_html += add_plot_to_html(fig_pr_auc)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot pr auc: {e}")
+
+        self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 1bc26b9636d2 pycaret_macros.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_macros.xml	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,25 @@
+
+    3.3.2
+    0
+    @PYCARET_VERSION@+@SUFFIX@
+    21.05
+    
+        
+            quay.io/goeckslab/galaxy-pycaret:3.3.2
+        
+    
+    
+        
+            @Manual{PyCaret,
+  author  = {Moez Ali},
+  title   = {PyCaret: An open source, low-code machine learning library in Python},
+  year    = {2020},
+  month   = {April},
+  note    = {PyCaret version 1.0.0},
+  url     = {https://www.pycaret.org}
+}
+            
+        
+    
+
+
\ No newline at end of file
diff -r 000000000000 -r 1bc26b9636d2 pycaret_predict.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,200 @@
+import argparse
+import logging
+import tempfile
+
+import h5py
+
+import joblib
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+from sklearn.metrics import average_precision_score
+
+from utils import encode_image_to_base64, get_html_closing, get_html_template
+
+LOG = logging.getLogger(__name__)
+
+
+class PyCaretModelEvaluator:
+    def __init__(self, model_path, task, target):
+        self.model_path = model_path
+        self.task = task.lower()
+        self.model = self.load_h5_model()
+        self.target = target if target != "None" else None
+
+    def load_h5_model(self):
+        """Load a PyCaret model from an HDF5 file."""
+        with h5py.File(self.model_path, 'r') as f:
+            model_bytes = bytes(f['model'][()])
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file.write(model_bytes)
+                temp_file.seek(0)
+                loaded_model = joblib.load(temp_file.name)
+        return loaded_model
+
+    def evaluate(self, data_path):
+        """Evaluate the model using the specified data."""
+        raise NotImplementedError("Subclasses must implement this method")
+
+
+class ClassificationEvaluator(PyCaretModelEvaluator):
+    def evaluate(self, data_path):
+        metrics = None
+        plot_paths = {}
+        data = pd.read_csv(data_path, engine='python', sep=None)
+        if self.target:
+            exp = ClassificationExperiment()
+            names = data.columns.to_list()
+            LOG.error(f"Column names: {names}")
+            target_index = int(self.target)-1
+            target_name = names[target_index]
+            exp.setup(data, target=target_name, test_data=data, index=False)
+            exp.add_metric(id='PR-AUC-Weighted',
+                           name='PR-AUC-Weighted',
+                           target='pred_proba',
+                           score_func=average_precision_score,
+                           average='weighted')
+            predictions = exp.predict_model(self.model)
+            metrics = exp.pull()
+            plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+                     'error', 'class_report', 'learning', 'calibration',
+                     'vc', 'dimension', 'manifold', 'rfe', 'feature',
+                     'feature_all']
+            for plot_name in plots:
+                try:
+                    if plot_name == 'auc' and not exp.is_multiclass:
+                        plot_path = exp.plot_model(self.model,
+                                                   plot=plot_name,
+                                                   save=True,
+                                                   plot_kwargs={
+                                                       'micro': False,
+                                                       'macro': False,
+                                                       'per_class': False,
+                                                       'binary': True
+                                                    })
+                        plot_paths[plot_name] = plot_path
+                        continue
+
+                    plot_path = exp.plot_model(self.model,
+                                               plot=plot_name, save=True)
+                    plot_paths[plot_name] = plot_path
+                except Exception as e:
+                    LOG.error(f"Error generating plot {plot_name}: {e}")
+                    continue
+            generate_html_report(plot_paths, metrics)
+
+        else:
+            exp = ClassificationExperiment()
+            exp.setup(data, target=None, test_data=data, index=False)
+            predictions = exp.predict_model(self.model, data=data)
+
+        return predictions, metrics, plot_paths
+
+
+class RegressionEvaluator(PyCaretModelEvaluator):
+    def evaluate(self, data_path):
+        metrics = None
+        plot_paths = {}
+        data = pd.read_csv(data_path, engine='python', sep=None)
+        if self.target:
+            names = data.columns.to_list()
+            target_index = int(self.target)-1
+            target_name = names[target_index]
+            exp = RegressionExperiment()
+            exp.setup(data, target=target_name, test_data=data, index=False)
+            predictions = exp.predict_model(self.model)
+            metrics = exp.pull()
+            plots = ['residuals', 'error', 'cooks',
+                     'learning', 'vc', 'manifold',
+                     'rfe', 'feature', 'feature_all']
+            for plot_name in plots:
+                try:
+                    plot_path = exp.plot_model(self.model,
+                                               plot=plot_name, save=True)
+                    plot_paths[plot_name] = plot_path
+                except Exception as e:
+                    LOG.error(f"Error generating plot {plot_name}: {e}")
+                    continue
+            generate_html_report(plot_paths, metrics)
+        else:
+            exp = RegressionExperiment()
+            exp.setup(data, target=None, test_data=data, index=False)
+            predictions = exp.predict_model(self.model, data=data)
+
+        return predictions, metrics, plot_paths
+
+
+def generate_html_report(plots, metrics):
+    """Generate an HTML evaluation report."""
+    plots_html = ""
+    for plot_name, plot_path in plots.items():
+        encoded_image = encode_image_to_base64(plot_path)
+        plots_html += f"""
+        
+            
{plot_name.capitalize()}
+            

+        
+        """
+
+    metrics_html = metrics.to_html(index=False, classes="table")
+
+    html_content = f"""
+    {get_html_template()}
+    Model Evaluation Report
+    
+    
+    
+        
Plots
+        {plots_html}
+    
+    {get_html_closing()}
+    """
+
+    # Save HTML report
+    with open("evaluation_report.html", "w") as html_file:
+        html_file.write(html_content)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Evaluate a PyCaret model stored in HDF5 format.")
+    parser.add_argument("--model_path",
+                        type=str,
+                        help="Path to the HDF5 model file.")
+    parser.add_argument("--data_path",
+                        type=str,
+                        help="Path to the evaluation data CSV file.")
+    parser.add_argument("--task",
+                        type=str,
+                        choices=["classification", "regression"],
+                        help="Specify the task: classification or regression.")
+    parser.add_argument("--target",
+                        default=None,
+                        help="Column number of the target")
+    args = parser.parse_args()
+
+    if args.task == "classification":
+        evaluator = ClassificationEvaluator(
+            args.model_path, args.task, args.target)
+    elif args.task == "regression":
+        evaluator = RegressionEvaluator(
+            args.model_path, args.task, args.target)
+    else:
+        raise ValueError(
+            "Unsupported task type. Use 'classification' or 'regression'.")
+
+    predictions, metrics, plots = evaluator.evaluate(args.data_path)
+
+    predictions.to_csv("predictions.csv", index=False)
diff -r 000000000000 -r 1bc26b9636d2 pycaret_predict.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.xml	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,61 @@
+
+    predicts/evaluates your pycaret ML model on a dataset. 
+    
+        pycaret_macros.xml
+    
+    
+    
+        
+    
+    
+        
+        
+        
+            
+            
+        
+        
+       
+    
+        
+        
+            target_feature
+        
+    
+    
+        
+            
+            
+            
+            
+            
+            
+        
+        
+            
+            
+            
+            
+            
+            
+        
+    
+    
+        This tool uses PyCaret to evaluate a machine learning model or do prediction.
+
+        **Outputs**:
+
+        - **prediction**: The prediction results on the dataset in a csv format.
+
+        - **report**: The evaluation report is generated in HTML format. 
+            if you upload a dataset with a target column and select the target column in the target_feature input field.
+
+    
+    
+
\ No newline at end of file
diff -r 000000000000 -r 1bc26b9636d2 pycaret_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_regression.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,134 @@
+import logging
+
+from base_model_trainer import BaseModelTrainer
+
+from dashboard import generate_regression_explainer_dashboard
+
+from pycaret.regression import RegressionExperiment
+
+from utils import add_hr_to_html, add_plot_to_html
+
+LOG = logging.getLogger(__name__)
+
+
+class RegressionModelTrainer(BaseModelTrainer):
+    def __init__(
+            self,
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file=None,
+            **kwargs):
+        super().__init__(
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file,
+            **kwargs)
+        self.exp = RegressionExperiment()
+
+    def save_dashboard(self):
+        LOG.info("Saving explainer dashboard")
+        dashboard = generate_regression_explainer_dashboard(self.exp,
+                                                            self.best_model)
+        dashboard.save_html("dashboard.html")
+
+    def generate_plots(self):
+        LOG.info("Generating and saving plots")
+        plots = ['residuals', 'error', 'cooks',
+                 'learning', 'vc', 'manifold',
+                 'rfe', 'feature', 'feature_all']
+        for plot_name in plots:
+            try:
+                plot_path = self.exp.plot_model(self.best_model,
+                                                plot=plot_name, save=True)
+                self.plots[plot_name] = plot_path
+            except Exception as e:
+                LOG.error(f"Error generating plot {plot_name}: {e}")
+                continue
+
+    def generate_plots_explainer(self):
+        LOG.info("Generating and saving plots from explainer")
+
+        from explainerdashboard import RegressionExplainer
+
+        X_test = self.exp.X_test_transformed.copy()
+        y_test = self.exp.y_test_transformed
+
+        explainer = RegressionExplainer(self.best_model, X_test, y_test)
+        self.expaliner = explainer
+        plots_explainer_html = ""
+
+        try:
+            fig_importance = explainer.plot_importances()
+            plots_explainer_html += add_plot_to_html(fig_importance)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance: {e}")
+
+        try:
+            fig_importance_permutation = \
+                explainer.plot_importances_permutation(
+                    kind="permutation")
+            plots_explainer_html += add_plot_to_html(
+                fig_importance_permutation)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance permutation: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_shap = explainer.plot_pdp(feature)
+                plots_explainer_html += add_plot_to_html(fig_shap)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot shap dependence: {e}")
+
+        # try:
+        #     for feature in self.features_name:
+        #         fig_interaction = explainer.plot_interaction(col=feature)
+        #         plots_explainer_html += add_plot_to_html(fig_interaction)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot shap interaction: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_interactions_importance = \
+                    explainer.plot_interactions_importance(
+                        col=feature)
+                plots_explainer_html += add_plot_to_html(
+                    fig_interactions_importance)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot shap summary: {e}")
+
+        # Regression specific plots
+        try:
+            fig_pred_actual = explainer.plot_predicted_vs_actual()
+            plots_explainer_html += add_plot_to_html(fig_pred_actual)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot prediction vs actual: {e}")
+
+        try:
+            fig_residuals = explainer.plot_residuals()
+            plots_explainer_html += add_plot_to_html(fig_residuals)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot residuals: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_residuals_vs_feature = \
+                    explainer.plot_residuals_vs_feature(feature)
+                plots_explainer_html += add_plot_to_html(
+                    fig_residuals_vs_feature)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot residuals vs feature: {e}")
+
+        self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 1bc26b9636d2 pycaret_train.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.py	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,117 @@
+import argparse
+import logging
+
+from pycaret_classification import ClassificationModelTrainer
+
+from pycaret_regression import RegressionModelTrainer
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_file", help="Path to the input file")
+    parser.add_argument("--target_col", help="Column number of the target")
+    parser.add_argument("--output_dir",
+                        help="Path to the output directory")
+    parser.add_argument("--model_type",
+                        choices=["classification", "regression"],
+                        help="Type of the model")
+    parser.add_argument("--train_size", type=float,
+                        default=None,
+                        help="Train size for PyCaret setup")
+    parser.add_argument("--normalize", action="store_true",
+                        default=None,
+                        help="Normalize data for PyCaret setup")
+    parser.add_argument("--feature_selection", action="store_true",
+                        default=None,
+                        help="Perform feature selection for PyCaret setup")
+    parser.add_argument("--cross_validation", action="store_true",
+                        default=None,
+                        help="Perform cross-validation for PyCaret setup")
+    parser.add_argument("--cross_validation_folds", type=int,
+                        default=None,
+                        help="Number of cross-validation folds \
+                          for PyCaret setup")
+    parser.add_argument("--remove_outliers", action="store_true",
+                        default=None,
+                        help="Remove outliers for PyCaret setup")
+    parser.add_argument("--remove_multicollinearity", action="store_true",
+                        default=None,
+                        help="Remove multicollinearity for PyCaret setup")
+    parser.add_argument("--polynomial_features", action="store_true",
+                        default=None,
+                        help="Generate polynomial features for PyCaret setup")
+    parser.add_argument("--feature_interaction", action="store_true",
+                        default=None,
+                        help="Generate feature interactions for PyCaret setup")
+    parser.add_argument("--feature_ratio", action="store_true",
+                        default=None,
+                        help="Generate feature ratios for PyCaret setup")
+    parser.add_argument("--fix_imbalance", action="store_true",
+                        default=None,
+                        help="Fix class imbalance for PyCaret setup")
+    parser.add_argument("--models", nargs='+',
+                        default=None,
+                        help="Selected models for training")
+    parser.add_argument("--random_seed", type=int,
+                        default=42,
+                        help="Random seed for PyCaret setup")
+    parser.add_argument("--test_file", type=str, default=None,
+                        help="Path to the test data file")
+
+    args = parser.parse_args()
+
+    model_kwargs = {
+        "train_size": args.train_size,
+        "normalize": args.normalize,
+        "feature_selection": args.feature_selection,
+        "cross_validation": args.cross_validation,
+        "cross_validation_folds": args.cross_validation_folds,
+        "remove_outliers": args.remove_outliers,
+        "remove_multicollinearity": args.remove_multicollinearity,
+        "polynomial_features": args.polynomial_features,
+        "feature_interaction": args.feature_interaction,
+        "feature_ratio": args.feature_ratio,
+        "fix_imbalance": args.fix_imbalance,
+    }
+    LOG.info(f"Model kwargs: {model_kwargs}")
+
+    # Remove None values from model_kwargs
+
+    LOG.info(f"Model kwargs 2: {model_kwargs}")
+    if args.models:
+        model_kwargs["models"] = args.models[0].split(",")
+
+    model_kwargs = {k: v for k, v in model_kwargs.items() if v is not None}
+
+    if args.model_type == "classification":
+        trainer = ClassificationModelTrainer(
+            args.input_file,
+            args.target_col,
+            args.output_dir,
+            args.model_type,
+            args.random_seed,
+            args.test_file,
+            **model_kwargs)
+    elif args.model_type == "regression":
+        if "fix_imbalance" in model_kwargs:
+            del model_kwargs["fix_imbalance"]
+        trainer = RegressionModelTrainer(
+            args.input_file,
+            args.target_col,
+            args.output_dir,
+            args.model_type,
+            args.random_seed,
+            args.test_file,
+            **model_kwargs)
+    else:
+        LOG.error("Invalid model type. Please choose \
+                  'classification' or 'regression'.")
+        return
+    trainer.run()
+
+
+if __name__ == "__main__":
+    main()
diff -r 000000000000 -r 1bc26b9636d2 pycaret_train.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.xml	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,209 @@
+
+    compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM. 
+    
+        pycaret_macros.xml
+    
+    
+    
+        
+    
+    
+        
+        
+       
+        
+            
+                
+                
+            
+            
+                
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                
+            
+            
+                
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                    
+                
+            
+        
+        
+        
+            
+                
+                
+            
+            
+                
+                
+                
+                
+                    
+                        
+                        
+                    
+                    
+                        
+                    
+                    
+                        
+                    
+                
+                
+                
+                
+                
+            
+            
+                
+            
+        
+    
+    
+        
+        
+        
+    
+    
+        
+            
+             
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+            
+             
+            
+        
+        
+            
+             
+            
+            
+            
+             
+            
+        
+        
+            
+             
+            
+            
+            
+             
+            
+        
+    
+    
+        This tool uses PyCaret to train and evaluate machine learning models.
+        It compares different models on a dataset and provides the best model based on the performance metrics.
+
+        **Outputs**
+
+        - **Model**: The best model trained on the dataset in h5 format.
+
+
+        - **Comparison Result**: The comparison result of different models in html format. 
+            It contains the performance metrics of different models, plots of the best model 
+            on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots.
+
+    
+    
+
\ No newline at end of file
diff -r 000000000000 -r 1bc26b9636d2 test-data/auto-mpg.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/auto-mpg.tsv	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,399 @@
+MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	ModelYear	Origin
+18.0	8	307.0	130.0	3504.	12.0	70	1
+15.0	8	350.0	165.0	3693.	11.5	70	1
+18.0	8	318.0	150.0	3436.	11.0	70	1
+16.0	8	304.0	150.0	3433.	12.0	70	1
+17.0	8	302.0	140.0	3449.	10.5	70	1
+15.0	8	429.0	198.0	4341.	10.0	70	1
+14.0	8	454.0	220.0	4354.	9.0	70	1
+14.0	8	440.0	215.0	4312.	8.5	70	1
+14.0	8	455.0	225.0	4425.	10.0	70	1
+15.0	8	390.0	190.0	3850.	8.5	70	1
+15.0	8	383.0	170.0	3563.	10.0	70	1
+14.0	8	340.0	160.0	3609.	8.0	70	1
+15.0	8	400.0	150.0	3761.	9.5	70	1
+14.0	8	455.0	225.0	3086.	10.0	70	1
+24.0	4	113.0	95.00	2372.	15.0	70	3
+22.0	6	198.0	95.00	2833.	15.5	70	1
+18.0	6	199.0	97.00	2774.	15.5	70	1
+21.0	6	200.0	85.00	2587.	16.0	70	1
+27.0	4	97.00	88.00	2130.	14.5	70	3
+26.0	4	97.00	46.00	1835.	20.5	70	2
+25.0	4	110.0	87.00	2672.	17.5	70	2
+24.0	4	107.0	90.00	2430.	14.5	70	2
+25.0	4	104.0	95.00	2375.	17.5	70	2
+26.0	4	121.0	113.0	2234.	12.5	70	2
+21.0	6	199.0	90.00	2648.	15.0	70	1
+10.0	8	360.0	215.0	4615.	14.0	70	1
+10.0	8	307.0	200.0	4376.	15.0	70	1
+11.0	8	318.0	210.0	4382.	13.5	70	1
+9.0	8	304.0	193.0	4732.	18.5	70	1
+27.0	4	97.00	88.00	2130.	14.5	71	3
+28.0	4	140.0	90.00	2264.	15.5	71	1
+25.0	4	113.0	95.00	2228.	14.0	71	3
+25.0	4	98.00	?	2046.	19.0	71	1
+19.0	6	232.0	100.0	2634.	13.0	71	1
+16.0	6	225.0	105.0	3439.	15.5	71	1
+17.0	6	250.0	100.0	3329.	15.5	71	1
+19.0	6	250.0	88.00	3302.	15.5	71	1
+18.0	6	232.0	100.0	3288.	15.5	71	1
+14.0	8	350.0	165.0	4209.	12.0	71	1
+14.0	8	400.0	175.0	4464.	11.5	71	1
+14.0	8	351.0	153.0	4154.	13.5	71	1
+14.0	8	318.0	150.0	4096.	13.0	71	1
+12.0	8	383.0	180.0	4955.	11.5	71	1
+13.0	8	400.0	170.0	4746.	12.0	71	1
+13.0	8	400.0	175.0	5140.	12.0	71	1
+18.0	6	258.0	110.0	2962.	13.5	71	1
+22.0	4	140.0	72.00	2408.	19.0	71	1
+19.0	6	250.0	100.0	3282.	15.0	71	1
+18.0	6	250.0	88.00	3139.	14.5	71	1
+23.0	4	122.0	86.00	2220.	14.0	71	1
+28.0	4	116.0	90.00	2123.	14.0	71	2
+30.0	4	79.00	70.00	2074.	19.5	71	2
+30.0	4	88.00	76.00	2065.	14.5	71	2
+31.0	4	71.00	65.00	1773.	19.0	71	3
+35.0	4	72.00	69.00	1613.	18.0	71	3
+27.0	4	97.00	60.00	1834.	19.0	71	2
+26.0	4	91.00	70.00	1955.	20.5	71	1
+24.0	4	113.0	95.00	2278.	15.5	72	3
+25.0	4	97.50	80.00	2126.	17.0	72	1
+23.0	4	97.00	54.00	2254.	23.5	72	2
+20.0	4	140.0	90.00	2408.	19.5	72	1
+21.0	4	122.0	86.00	2226.	16.5	72	1
+13.0	8	350.0	165.0	4274.	12.0	72	1
+14.0	8	400.0	175.0	4385.	12.0	72	1
+15.0	8	318.0	150.0	4135.	13.5	72	1
+14.0	8	351.0	153.0	4129.	13.0	72	1
+17.0	8	304.0	150.0	3672.	11.5	72	1
+11.0	8	429.0	208.0	4633.	11.0	72	1
+13.0	8	350.0	155.0	4502.	13.5	72	1
+12.0	8	350.0	160.0	4456.	13.5	72	1
+13.0	8	400.0	190.0	4422.	12.5	72	1
+19.0	3	70.00	97.00	2330.	13.5	72	3
+15.0	8	304.0	150.0	3892.	12.5	72	1
+13.0	8	307.0	130.0	4098.	14.0	72	1
+13.0	8	302.0	140.0	4294.	16.0	72	1
+14.0	8	318.0	150.0	4077.	14.0	72	1
+18.0	4	121.0	112.0	2933.	14.5	72	2
+22.0	4	121.0	76.00	2511.	18.0	72	2
+21.0	4	120.0	87.00	2979.	19.5	72	2
+26.0	4	96.00	69.00	2189.	18.0	72	2
+22.0	4	122.0	86.00	2395.	16.0	72	1
+28.0	4	97.00	92.00	2288.	17.0	72	3
+23.0	4	120.0	97.00	2506.	14.5	72	3
+28.0	4	98.00	80.00	2164.	15.0	72	1
+27.0	4	97.00	88.00	2100.	16.5	72	3
+13.0	8	350.0	175.0	4100.	13.0	73	1
+14.0	8	304.0	150.0	3672.	11.5	73	1
+13.0	8	350.0	145.0	3988.	13.0	73	1
+14.0	8	302.0	137.0	4042.	14.5	73	1
+15.0	8	318.0	150.0	3777.	12.5	73	1
+12.0	8	429.0	198.0	4952.	11.5	73	1
+13.0	8	400.0	150.0	4464.	12.0	73	1
+13.0	8	351.0	158.0	4363.	13.0	73	1
+14.0	8	318.0	150.0	4237.	14.5	73	1
+13.0	8	440.0	215.0	4735.	11.0	73	1
+12.0	8	455.0	225.0	4951.	11.0	73	1
+13.0	8	360.0	175.0	3821.	11.0	73	1
+18.0	6	225.0	105.0	3121.	16.5	73	1
+16.0	6	250.0	100.0	3278.	18.0	73	1
+18.0	6	232.0	100.0	2945.	16.0	73	1
+18.0	6	250.0	88.00	3021.	16.5	73	1
+23.0	6	198.0	95.00	2904.	16.0	73	1
+26.0	4	97.00	46.00	1950.	21.0	73	2
+11.0	8	400.0	150.0	4997.	14.0	73	1
+12.0	8	400.0	167.0	4906.	12.5	73	1
+13.0	8	360.0	170.0	4654.	13.0	73	1
+12.0	8	350.0	180.0	4499.	12.5	73	1
+18.0	6	232.0	100.0	2789.	15.0	73	1
+20.0	4	97.00	88.00	2279.	19.0	73	3
+21.0	4	140.0	72.00	2401.	19.5	73	1
+22.0	4	108.0	94.00	2379.	16.5	73	3
+18.0	3	70.00	90.00	2124.	13.5	73	3
+19.0	4	122.0	85.00	2310.	18.5	73	1
+21.0	6	155.0	107.0	2472.	14.0	73	1
+26.0	4	98.00	90.00	2265.	15.5	73	2
+15.0	8	350.0	145.0	4082.	13.0	73	1
+16.0	8	400.0	230.0	4278.	9.50	73	1
+29.0	4	68.00	49.00	1867.	19.5	73	2
+24.0	4	116.0	75.00	2158.	15.5	73	2
+20.0	4	114.0	91.00	2582.	14.0	73	2
+19.0	4	121.0	112.0	2868.	15.5	73	2
+15.0	8	318.0	150.0	3399.	11.0	73	1
+24.0	4	121.0	110.0	2660.	14.0	73	2
+20.0	6	156.0	122.0	2807.	13.5	73	3
+11.0	8	350.0	180.0	3664.	11.0	73	1
+20.0	6	198.0	95.00	3102.	16.5	74	1
+21.0	6	200.0	?	2875.	17.0	74	1
+19.0	6	232.0	100.0	2901.	16.0	74	1
+15.0	6	250.0	100.0	3336.	17.0	74	1
+31.0	4	79.00	67.00	1950.	19.0	74	3
+26.0	4	122.0	80.00	2451.	16.5	74	1
+32.0	4	71.00	65.00	1836.	21.0	74	3
+25.0	4	140.0	75.00	2542.	17.0	74	1
+16.0	6	250.0	100.0	3781.	17.0	74	1
+16.0	6	258.0	110.0	3632.	18.0	74	1
+18.0	6	225.0	105.0	3613.	16.5	74	1
+16.0	8	302.0	140.0	4141.	14.0	74	1
+13.0	8	350.0	150.0	4699.	14.5	74	1
+14.0	8	318.0	150.0	4457.	13.5	74	1
+14.0	8	302.0	140.0	4638.	16.0	74	1
+14.0	8	304.0	150.0	4257.	15.5	74	1
+29.0	4	98.00	83.00	2219.	16.5	74	2
+26.0	4	79.00	67.00	1963.	15.5	74	2
+26.0	4	97.00	78.00	2300.	14.5	74	2
+31.0	4	76.00	52.00	1649.	16.5	74	3
+32.0	4	83.00	61.00	2003.	19.0	74	3
+28.0	4	90.00	75.00	2125.	14.5	74	1
+24.0	4	90.00	75.00	2108.	15.5	74	2
+26.0	4	116.0	75.00	2246.	14.0	74	2
+24.0	4	120.0	97.00	2489.	15.0	74	3
+26.0	4	108.0	93.00	2391.	15.5	74	3
+31.0	4	79.00	67.00	2000.	16.0	74	2
+19.0	6	225.0	95.00	3264.	16.0	75	1
+18.0	6	250.0	105.0	3459.	16.0	75	1
+15.0	6	250.0	72.00	3432.	21.0	75	1
+15.0	6	250.0	72.00	3158.	19.5	75	1
+16.0	8	400.0	170.0	4668.	11.5	75	1
+15.0	8	350.0	145.0	4440.	14.0	75	1
+16.0	8	318.0	150.0	4498.	14.5	75	1
+14.0	8	351.0	148.0	4657.	13.5	75	1
+17.0	6	231.0	110.0	3907.	21.0	75	1
+16.0	6	250.0	105.0	3897.	18.5	75	1
+15.0	6	258.0	110.0	3730.	19.0	75	1
+18.0	6	225.0	95.00	3785.	19.0	75	1
+21.0	6	231.0	110.0	3039.	15.0	75	1
+20.0	8	262.0	110.0	3221.	13.5	75	1
+13.0	8	302.0	129.0	3169.	12.0	75	1
+29.0	4	97.00	75.00	2171.	16.0	75	3
+23.0	4	140.0	83.00	2639.	17.0	75	1
+20.0	6	232.0	100.0	2914.	16.0	75	1
+23.0	4	140.0	78.00	2592.	18.5	75	1
+24.0	4	134.0	96.00	2702.	13.5	75	3
+25.0	4	90.00	71.00	2223.	16.5	75	2
+24.0	4	119.0	97.00	2545.	17.0	75	3
+18.0	6	171.0	97.00	2984.	14.5	75	1
+29.0	4	90.00	70.00	1937.	14.0	75	2
+19.0	6	232.0	90.00	3211.	17.0	75	1
+23.0	4	115.0	95.00	2694.	15.0	75	2
+23.0	4	120.0	88.00	2957.	17.0	75	2
+22.0	4	121.0	98.00	2945.	14.5	75	2
+25.0	4	121.0	115.0	2671.	13.5	75	2
+33.0	4	91.00	53.00	1795.	17.5	75	3
+28.0	4	107.0	86.00	2464.	15.5	76	2
+25.0	4	116.0	81.00	2220.	16.9	76	2
+25.0	4	140.0	92.00	2572.	14.9	76	1
+26.0	4	98.00	79.00	2255.	17.7	76	1
+27.0	4	101.0	83.00	2202.	15.3	76	2
+17.5	8	305.0	140.0	4215.	13.0	76	1
+16.0	8	318.0	150.0	4190.	13.0	76	1
+15.5	8	304.0	120.0	3962.	13.9	76	1
+14.5	8	351.0	152.0	4215.	12.8	76	1
+22.0	6	225.0	100.0	3233.	15.4	76	1
+22.0	6	250.0	105.0	3353.	14.5	76	1
+24.0	6	200.0	81.00	3012.	17.6	76	1
+22.5	6	232.0	90.00	3085.	17.6	76	1
+29.0	4	85.00	52.00	2035.	22.2	76	1
+24.5	4	98.00	60.00	2164.	22.1	76	1
+29.0	4	90.00	70.00	1937.	14.2	76	2
+33.0	4	91.00	53.00	1795.	17.4	76	3
+20.0	6	225.0	100.0	3651.	17.7	76	1
+18.0	6	250.0	78.00	3574.	21.0	76	1
+18.5	6	250.0	110.0	3645.	16.2	76	1
+17.5	6	258.0	95.00	3193.	17.8	76	1
+29.5	4	97.00	71.00	1825.	12.2	76	2
+32.0	4	85.00	70.00	1990.	17.0	76	3
+28.0	4	97.00	75.00	2155.	16.4	76	3
+26.5	4	140.0	72.00	2565.	13.6	76	1
+20.0	4	130.0	102.0	3150.	15.7	76	2
+13.0	8	318.0	150.0	3940.	13.2	76	1
+19.0	4	120.0	88.00	3270.	21.9	76	2
+19.0	6	156.0	108.0	2930.	15.5	76	3
+16.5	6	168.0	120.0	3820.	16.7	76	2
+16.5	8	350.0	180.0	4380.	12.1	76	1
+13.0	8	350.0	145.0	4055.	12.0	76	1
+13.0	8	302.0	130.0	3870.	15.0	76	1
+13.0	8	318.0	150.0	3755.	14.0	76	1
+31.5	4	98.00	68.00	2045.	18.5	77	3
+30.0	4	111.0	80.00	2155.	14.8	77	1
+36.0	4	79.00	58.00	1825.	18.6	77	2
+25.5	4	122.0	96.00	2300.	15.5	77	1
+33.5	4	85.00	70.00	1945.	16.8	77	3
+17.5	8	305.0	145.0	3880.	12.5	77	1
+17.0	8	260.0	110.0	4060.	19.0	77	1
+15.5	8	318.0	145.0	4140.	13.7	77	1
+15.0	8	302.0	130.0	4295.	14.9	77	1
+17.5	6	250.0	110.0	3520.	16.4	77	1
+20.5	6	231.0	105.0	3425.	16.9	77	1
+19.0	6	225.0	100.0	3630.	17.7	77	1
+18.5	6	250.0	98.00	3525.	19.0	77	1
+16.0	8	400.0	180.0	4220.	11.1	77	1
+15.5	8	350.0	170.0	4165.	11.4	77	1
+15.5	8	400.0	190.0	4325.	12.2	77	1
+16.0	8	351.0	149.0	4335.	14.5	77	1
+29.0	4	97.00	78.00	1940.	14.5	77	2
+24.5	4	151.0	88.00	2740.	16.0	77	1
+26.0	4	97.00	75.00	2265.	18.2	77	3
+25.5	4	140.0	89.00	2755.	15.8	77	1
+30.5	4	98.00	63.00	2051.	17.0	77	1
+33.5	4	98.00	83.00	2075.	15.9	77	1
+30.0	4	97.00	67.00	1985.	16.4	77	3
+30.5	4	97.00	78.00	2190.	14.1	77	2
+22.0	6	146.0	97.00	2815.	14.5	77	3
+21.5	4	121.0	110.0	2600.	12.8	77	2
+21.5	3	80.00	110.0	2720.	13.5	77	3
+43.1	4	90.00	48.00	1985.	21.5	78	2
+36.1	4	98.00	66.00	1800.	14.4	78	1
+32.8	4	78.00	52.00	1985.	19.4	78	3
+39.4	4	85.00	70.00	2070.	18.6	78	3
+36.1	4	91.00	60.00	1800.	16.4	78	3
+19.9	8	260.0	110.0	3365.	15.5	78	1
+19.4	8	318.0	140.0	3735.	13.2	78	1
+20.2	8	302.0	139.0	3570.	12.8	78	1
+19.2	6	231.0	105.0	3535.	19.2	78	1
+20.5	6	200.0	95.00	3155.	18.2	78	1
+20.2	6	200.0	85.00	2965.	15.8	78	1
+25.1	4	140.0	88.00	2720.	15.4	78	1
+20.5	6	225.0	100.0	3430.	17.2	78	1
+19.4	6	232.0	90.00	3210.	17.2	78	1
+20.6	6	231.0	105.0	3380.	15.8	78	1
+20.8	6	200.0	85.00	3070.	16.7	78	1
+18.6	6	225.0	110.0	3620.	18.7	78	1
+18.1	6	258.0	120.0	3410.	15.1	78	1
+19.2	8	305.0	145.0	3425.	13.2	78	1
+17.7	6	231.0	165.0	3445.	13.4	78	1
+18.1	8	302.0	139.0	3205.	11.2	78	1
+17.5	8	318.0	140.0	4080.	13.7	78	1
+30.0	4	98.00	68.00	2155.	16.5	78	1
+27.5	4	134.0	95.00	2560.	14.2	78	3
+27.2	4	119.0	97.00	2300.	14.7	78	3
+30.9	4	105.0	75.00	2230.	14.5	78	1
+21.1	4	134.0	95.00	2515.	14.8	78	3
+23.2	4	156.0	105.0	2745.	16.7	78	1
+23.8	4	151.0	85.00	2855.	17.6	78	1
+23.9	4	119.0	97.00	2405.	14.9	78	3
+20.3	5	131.0	103.0	2830.	15.9	78	2
+17.0	6	163.0	125.0	3140.	13.6	78	2
+21.6	4	121.0	115.0	2795.	15.7	78	2
+16.2	6	163.0	133.0	3410.	15.8	78	2
+31.5	4	89.00	71.00	1990.	14.9	78	2
+29.5	4	98.00	68.00	2135.	16.6	78	3
+21.5	6	231.0	115.0	3245.	15.4	79	1
+19.8	6	200.0	85.00	2990.	18.2	79	1
+22.3	4	140.0	88.00	2890.	17.3	79	1
+20.2	6	232.0	90.00	3265.	18.2	79	1
+20.6	6	225.0	110.0	3360.	16.6	79	1
+17.0	8	305.0	130.0	3840.	15.4	79	1
+17.6	8	302.0	129.0	3725.	13.4	79	1
+16.5	8	351.0	138.0	3955.	13.2	79	1
+18.2	8	318.0	135.0	3830.	15.2	79	1
+16.9	8	350.0	155.0	4360.	14.9	79	1
+15.5	8	351.0	142.0	4054.	14.3	79	1
+19.2	8	267.0	125.0	3605.	15.0	79	1
+18.5	8	360.0	150.0	3940.	13.0	79	1
+31.9	4	89.00	71.00	1925.	14.0	79	2
+34.1	4	86.00	65.00	1975.	15.2	79	3
+35.7	4	98.00	80.00	1915.	14.4	79	1
+27.4	4	121.0	80.00	2670.	15.0	79	1
+25.4	5	183.0	77.00	3530.	20.1	79	2
+23.0	8	350.0	125.0	3900.	17.4	79	1
+27.2	4	141.0	71.00	3190.	24.8	79	2
+23.9	8	260.0	90.00	3420.	22.2	79	1
+34.2	4	105.0	70.00	2200.	13.2	79	1
+34.5	4	105.0	70.00	2150.	14.9	79	1
+31.8	4	85.00	65.00	2020.	19.2	79	3
+37.3	4	91.00	69.00	2130.	14.7	79	2
+28.4	4	151.0	90.00	2670.	16.0	79	1
+28.8	6	173.0	115.0	2595.	11.3	79	1
+26.8	6	173.0	115.0	2700.	12.9	79	1
+33.5	4	151.0	90.00	2556.	13.2	79	1
+41.5	4	98.00	76.00	2144.	14.7	80	2
+38.1	4	89.00	60.00	1968.	18.8	80	3
+32.1	4	98.00	70.00	2120.	15.5	80	1
+37.2	4	86.00	65.00	2019.	16.4	80	3
+28.0	4	151.0	90.00	2678.	16.5	80	1
+26.4	4	140.0	88.00	2870.	18.1	80	1
+24.3	4	151.0	90.00	3003.	20.1	80	1
+19.1	6	225.0	90.00	3381.	18.7	80	1
+34.3	4	97.00	78.00	2188.	15.8	80	2
+29.8	4	134.0	90.00	2711.	15.5	80	3
+31.3	4	120.0	75.00	2542.	17.5	80	3
+37.0	4	119.0	92.00	2434.	15.0	80	3
+32.2	4	108.0	75.00	2265.	15.2	80	3
+46.6	4	86.00	65.00	2110.	17.9	80	3
+27.9	4	156.0	105.0	2800.	14.4	80	1
+40.8	4	85.00	65.00	2110.	19.2	80	3
+44.3	4	90.00	48.00	2085.	21.7	80	2
+43.4	4	90.00	48.00	2335.	23.7	80	2
+36.4	5	121.0	67.00	2950.	19.9	80	2
+30.0	4	146.0	67.00	3250.	21.8	80	2
+44.6	4	91.00	67.00	1850.	13.8	80	3
+40.9	4	85.00	?	1835.	17.3	80	2
+33.8	4	97.00	67.00	2145.	18.0	80	3
+29.8	4	89.00	62.00	1845.	15.3	80	2
+32.7	6	168.0	132.0	2910.	11.4	80	3
+23.7	3	70.00	100.0	2420.	12.5	80	3
+35.0	4	122.0	88.00	2500.	15.1	80	2
+23.6	4	140.0	?	2905.	14.3	80	1
+32.4	4	107.0	72.00	2290.	17.0	80	3
+27.2	4	135.0	84.00	2490.	15.7	81	1
+26.6	4	151.0	84.00	2635.	16.4	81	1
+25.8	4	156.0	92.00	2620.	14.4	81	1
+23.5	6	173.0	110.0	2725.	12.6	81	1
+30.0	4	135.0	84.00	2385.	12.9	81	1
+39.1	4	79.00	58.00	1755.	16.9	81	3
+39.0	4	86.00	64.00	1875.	16.4	81	1
+35.1	4	81.00	60.00	1760.	16.1	81	3
+32.3	4	97.00	67.00	2065.	17.8	81	3
+37.0	4	85.00	65.00	1975.	19.4	81	3
+37.7	4	89.00	62.00	2050.	17.3	81	3
+34.1	4	91.00	68.00	1985.	16.0	81	3
+34.7	4	105.0	63.00	2215.	14.9	81	1
+34.4	4	98.00	65.00	2045.	16.2	81	1
+29.9	4	98.00	65.00	2380.	20.7	81	1
+33.0	4	105.0	74.00	2190.	14.2	81	2
+34.5	4	100.0	?	2320.	15.8	81	2
+33.7	4	107.0	75.00	2210.	14.4	81	3
+32.4	4	108.0	75.00	2350.	16.8	81	3
+32.9	4	119.0	100.0	2615.	14.8	81	3
+31.6	4	120.0	74.00	2635.	18.3	81	3
+28.1	4	141.0	80.00	3230.	20.4	81	2
+30.7	6	145.0	76.00	3160.	19.6	81	2
+25.4	6	168.0	116.0	2900.	12.6	81	3
+24.2	6	146.0	120.0	2930.	13.8	81	3
+22.4	6	231.0	110.0	3415.	15.8	81	1
+26.6	8	350.0	105.0	3725.	19.0	81	1
+20.2	6	200.0	88.00	3060.	17.1	81	1
+17.6	6	225.0	85.00	3465.	16.6	81	1
+28.0	4	112.0	88.00	2605.	19.6	82	1
+27.0	4	112.0	88.00	2640.	18.6	82	1
+34.0	4	112.0	88.00	2395.	18.0	82	1
+31.0	4	112.0	85.00	2575.	16.2	82	1
+29.0	4	135.0	84.00	2525.	16.0	82	1
+27.0	4	151.0	90.00	2735.	18.0	82	1
+24.0	4	140.0	92.00	2865.	16.4	82	1
+23.0	4	151.0	?	3035.	20.5	82	1
+36.0	4	105.0	74.00	1980.	15.3	82	2
+37.0	4	91.00	68.00	2025.	18.2	82	3
+31.0	4	91.00	68.00	1970.	17.6	82	3
+38.0	4	105.0	63.00	2125.	14.7	82	1
+36.0	4	98.00	70.00	2125.	17.3	82	1
+36.0	4	120.0	88.00	2160.	14.5	82	3
+36.0	4	107.0	75.00	2205.	14.5	82	3
+34.0	4	108.0	70.00	2245	16.9	82	3
+38.0	4	91.00	67.00	1965.	15.0	82	3
+32.0	4	91.00	67.00	1965.	15.7	82	3
+38.0	4	91.00	67.00	1995.	16.2	82	3
+25.0	6	181.0	110.0	2945.	16.4	82	1
+38.0	6	262.0	85.00	3015.	17.0	82	1
+26.0	4	156.0	92.00	2585.	14.5	82	1
+22.0	6	232.0	112.0	2835	14.7	82	1
+32.0	4	144.0	96.00	2665.	13.9	82	3
+36.0	4	135.0	84.00	2370.	13.0	82	1
+27.0	4	151.0	90.00	2950.	17.3	82	1
+27.0	4	140.0	86.00	2790.	15.6	82	1
+44.0	4	97.00	52.00	2130.	24.6	82	2
+32.0	4	135.0	84.00	2295.	11.6	82	1
+28.0	4	120.0	79.00	2625.	18.6	82	1
+31.0	4	119.0	82.00	2720.	19.4	82	1
diff -r 000000000000 -r 1bc26b9636d2 test-data/evaluation_report_classification.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_classification.html	Wed Dec 11 03:29:00 2024 +0000
@@ -0,0 +1,236 @@
+
+    
+    
+    
+        Model Training Report
+        
+    
+    
+    
+    
+    
Model Evaluation Report
+    
+    
+        
Metrics
+        
+            
+  
+    
+      | Model+ | Accuracy+ | AUC+ | Recall+ | Prec.+ | F1+ | Kappa+ | MCC+ | PR-AUC-Weighted+ | 
+  
+  
+    
+      | Light Gradient Boosting Machine+ | 0.7826+ | 0.8162+ | 0.7419+ | 0.7667+ | 0.7541+ | 0.5594+ | 0.5596+ | 0.7753+ | 
+  
+
+        
+    
+    
+        
Plots
+        
+        
+            
Confusion_matrix
+            

+        
+        
+        
+        
+            
Auc
+            

+        
+        
+        
+        
+            
Threshold
+            

+        
+        
+        
+        
+            
Pr
+            

+        
+        
+        
+        
+            
Error
+            

+        
+        
+        
+        
+            
Class_report
+            

+        
+        
+        
+        
+            
Learning
+            

+        
+        
+        
+        
+            
Calibration
+            

+        
+        
+        
+        
+            
Vc
+            

+        
+        
+        
+        
+            
Dimension
+            

+        
+        
+        
+        
+            
Manifold
+            

+        
+        
+        
+        
+            
Rfe
+            

+        
+        
+        
+        
+            
Feature
+            

+        
+        
+        
+        
+            
Feature_all
+            

+        
+        
+        
+    
+    
+        
+    
+    
Model Evaluation Report
+    
+    
+        
Metrics
+        
+            
+  
+    
+      | Model+ | MAE+ | MSE+ | RMSE+ | R2+ | RMSLE+ | MAPE+ | 
+  
+  
+    
+      | Gradient Boosting Regressor+ | 1.6+ | 5.6214+ | 2.3709+ | 0.9077+ | 0.0875+ | 0.0691+ | 
+  
+
+        
+    
+    
+        
Plots
+        
+        
+            
Residuals
+            

+        
+        
+        
+        
+            
Error
+            

+        
+        
+        
+        
+            
Cooks
+            

+        
+        
+        
+        
+            
Learning
+            

+        
+        
+        
+        
+            
Vc
+            

+        
+        
+        
+        
+            
Manifold
+            

+        
+        
+        
+        
+            
Rfe
+            

+        
+        
+        
+        
+            
Feature
+            

+        
+        
+        
+        
+            
Feature_all
+            

+        
+        
+        
+    
+    
+        
+    
+            
PyCaret Model Training Report
+            
+                
+                Setup & Best Model
+                
+                Best Model Plots
+                
+                Feature Importance
+                
+                Explainer
+                
+            
+            
+                
Setup Parameters
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | target+ | PCR+ | 
+    
+      | session_id+ | 42+ | 
+    
+      | index+ | False+ | 
+  
+
+                
+                
If you want to know all the experiment setup parameters,
+                  please check the PyCaret documentation for
+                  the classification/regression exp function.
+                
Best Model: LGBMClassifier
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | boosting_type+ | gbdt+ | 
+    
+      | class_weight+ | None+ | 
+    
+      | colsample_bytree+ | 1.0+ | 
+    
+      | importance_type+ | split+ | 
+    
+      | learning_rate+ | 0.1+ | 
+    
+      | max_depth+ | -1+ | 
+    
+      | min_child_samples+ | 20+ | 
+    
+      | min_child_weight+ | 0.001+ | 
+    
+      | min_split_gain+ | 0.0+ | 
+    
+      | n_estimators+ | 100+ | 
+    
+      | n_jobs+ | -1+ | 
+    
+      | num_leaves+ | 31+ | 
+    
+      | objective+ | None+ | 
+    
+      | random_state+ | 42+ | 
+    
+      | reg_alpha+ | 0.0+ | 
+    
+      | reg_lambda+ | 0.0+ | 
+    
+      | subsample+ | 1.0+ | 
+    
+      | subsample_for_bin+ | 200000+ | 
+    
+      | subsample_freq+ | 0+ | 
+  
+
+                
+                
Comparison Results on the Cross-Validation Set
+                
+                    
+  
+    
+      | Model+ | Accuracy+ | ROC-AUC+ | Recall+ | Prec.+ | F1+ | Kappa+ | MCC+ | PR-AUC-Weighted+ | TT (Sec)+ | 
+  
+  
+    
+      | Light Gradient Boosting Machine+ | 0.715+ | 0.6000+ | 0.6500+ | 0.6917+ | 0.6357+ | 0.4380+ | 0.4748+ | 0.6822+ | 0.228+ | 
+    
+      | Logistic Regression+ | 0.670+ | 0.6500+ | 0.7500+ | 0.6167+ | 0.6633+ | 0.3478+ | 0.3742+ | 0.7144+ | 0.331+ | 
+    
+      | Ridge Classifier+ | 0.670+ | 0.6167+ | 0.7500+ | 0.6167+ | 0.6633+ | 0.3478+ | 0.3742+ | 0.0000+ | 0.180+ | 
+    
+      | Naive Bayes+ | 0.650+ | 0.6333+ | 0.6833+ | 0.5917+ | 0.6257+ | 0.2969+ | 0.3112+ | 0.6978+ | 2.694+ | 
+    
+      | Quadratic Discriminant Analysis+ | 0.610+ | 0.6333+ | 0.4667+ | 0.5333+ | 0.4733+ | 0.2256+ | 0.2488+ | 0.7033+ | 0.158+ | 
+    
+      | Linear Discriminant Analysis+ | 0.605+ | 0.6000+ | 0.7000+ | 0.5900+ | 0.6105+ | 0.2372+ | 0.2577+ | 0.6594+ | 0.110+ | 
+    
+      | CatBoost Classifier+ | 0.595+ | 0.6167+ | 0.6167+ | 0.5500+ | 0.5600+ | 0.2165+ | 0.2207+ | 0.6861+ | 12.075+ | 
+    
+      | Extra Trees Classifier+ | 0.590+ | 0.6000+ | 0.5833+ | 0.5000+ | 0.5300+ | 0.2103+ | 0.2167+ | 0.6811+ | 0.775+ | 
+    
+      | SVM - Linear Kernel+ | 0.585+ | 0.6500+ | 0.5333+ | 0.4667+ | 0.4521+ | 0.1429+ | 0.1690+ | 0.0000+ | 0.217+ | 
+    
+      | K Neighbors Classifier+ | 0.565+ | 0.6292+ | 0.5000+ | 0.5750+ | 0.5057+ | 0.1413+ | 0.1469+ | 0.6717+ | 0.685+ | 
+    
+      | Random Forest Classifier+ | 0.555+ | 0.5667+ | 0.5833+ | 0.5167+ | 0.5233+ | 0.1524+ | 0.1540+ | 0.6211+ | 0.847+ | 
+    
+      | Dummy Classifier+ | 0.540+ | 0.5000+ | 0.0000+ | 0.0000+ | 0.0000+ | 0.0000+ | 0.0000+ | 0.4600+ | 0.165+ | 
+    
+      | Ada Boost Classifier+ | 0.510+ | 0.4417+ | 0.5667+ | 0.4650+ | 0.4971+ | 0.0656+ | 0.0275+ | 0.5819+ | 0.645+ | 
+    
+      | Decision Tree Classifier+ | 0.495+ | 0.5000+ | 0.4333+ | 0.4333+ | 0.4133+ | 0.0049+ | 0.0040+ | 0.5483+ | 0.329+ | 
+    
+      | Gradient Boosting Classifier+ | 0.475+ | 0.4333+ | 0.4500+ | 0.4000+ | 0.4033+ | -0.0033+ | -0.0239+ | 0.5800+ | 0.643+ | 
+    
+      | Extreme Gradient Boosting+ | 0.460+ | 0.4833+ | 0.4333+ | 0.3333+ | 0.3667+ | -0.0489+ | -0.0537+ | 0.6281+ | 0.422+ | 
+  
+
+                
+                
Results on the Test Set for the best model
+                
+                    
+  
+    
+      | Model+ | Accuracy+ | ROC-AUC+ | Recall+ | Prec.+ | F1+ | Kappa+ | MCC+ | PR-AUC-Weighted+ | 
+  
+  
+    
+      | Light Gradient Boosting Machine+ | 0.8095+ | 0.7454+ | 0.6667+ | 0.8571+ | 0.75+ | 0.6+ | 0.6124+ | 0.6799+ | 
+  
+
+                
+            
+            
+                
Best Model Plots on the testing set
+                
+            
+                
Confusion_matrix
+                

+            
+            
+            
+                
Auc
+                

+            
+            
+            
+                
Threshold
+                

+            
+            
+            
+                
Pr
+                

+            
+            
+            
+                
Error
+                

+            
+            
+            
+                
Class_report
+                

+            
+            
+            
+                
Learning
+                

+            
+            
+            
+                
Calibration
+                

+            
+            
+            
+                
Vc
+                

+            
+            
+            
+                
Dimension
+                

+            
+            
+            
+                
Manifold
+                

+            
+            
+            
+                
Rfe
+                

+            
+            
+            
+                
Feature
+                

+            
+            
+            
+                
Feature_all
+                

+            
+            
+            
+            
+                
+            
PyCaret Feature Importance Report
+            
+            
+                
Feature importance analysis from atrained Random Forest
+                
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+                

+            
+            
+            
+                
SHAP Summary from a trained lightgbm
+                
+                

+            
+            
+        
+            
+            
+        
+        
+    
+            
PyCaret Model Training Report
+            
+                
+                Setup & Best Model
+                
+                Best Model Plots
+                
+                Feature Importance
+                
+                Explainer
+                
+            
+            
+                
Setup Parameters
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | target+ | PCR+ | 
+    
+      | session_id+ | 42+ | 
+    
+      | index+ | False+ | 
+    
+      | train_size+ | 0.8+ | 
+    
+      | normalize+ | True+ | 
+    
+      | feature_selection+ | True+ | 
+    
+      | fold+ | 5+ | 
+    
+      | remove_outliers+ | True+ | 
+    
+      | remove_multicollinearity+ | True+ | 
+  
+
+                
+                
If you want to know all the experiment setup parameters,
+                  please check the PyCaret documentation for
+                  the classification/regression exp function.
+                
Best Model: LGBMClassifier
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | boosting_type+ | gbdt+ | 
+    
+      | class_weight+ | None+ | 
+    
+      | colsample_bytree+ | 1.0+ | 
+    
+      | importance_type+ | split+ | 
+    
+      | learning_rate+ | 0.1+ | 
+    
+      | max_depth+ | -1+ | 
+    
+      | min_child_samples+ | 20+ | 
+    
+      | min_child_weight+ | 0.001+ | 
+    
+      | min_split_gain+ | 0.0+ | 
+    
+      | n_estimators+ | 100+ | 
+    
+      | n_jobs+ | -1+ | 
+    
+      | num_leaves+ | 31+ | 
+    
+      | objective+ | None+ | 
+    
+      | random_state+ | 42+ | 
+    
+      | reg_alpha+ | 0.0+ | 
+    
+      | reg_lambda+ | 0.0+ | 
+    
+      | subsample+ | 1.0+ | 
+    
+      | subsample_for_bin+ | 200000+ | 
+    
+      | subsample_freq+ | 0+ | 
+  
+
+                
+                
Comparison Results on the Cross-Validation Set
+                
+                    
+  
+    
+      | Model+ | Accuracy+ | ROC-AUC+ | Recall+ | Prec.+ | F1+ | Kappa+ | MCC+ | PR-AUC-Weighted+ | TT (Sec)+ | 
+  
+  
+    
+      | Light Gradient Boosting Machine+ | 0.7091+ | 0.6267+ | 0.64+ | 0.6895+ | 0.6467+ | 0.4056+ | 0.4224+ | 0.5918+ | 0.322+ | 
+    
+      | Naive Bayes+ | 0.6545+ | 0.6800+ | 0.72+ | 0.6117+ | 0.6498+ | 0.3163+ | 0.3232+ | 0.6930+ | 1.240+ | 
+    
+      | K Neighbors Classifier+ | 0.6364+ | 0.6467+ | 0.56+ | 0.6067+ | 0.5743+ | 0.2603+ | 0.2660+ | 0.6001+ | 0.864+ | 
+    
+      | Ridge Classifier+ | 0.6364+ | 0.6467+ | 0.64+ | 0.5962+ | 0.6048+ | 0.2700+ | 0.2835+ | 0.0000+ | 0.898+ | 
+    
+      | Random Forest Classifier+ | 0.6364+ | 0.6300+ | 0.60+ | 0.6343+ | 0.6013+ | 0.2688+ | 0.2834+ | 0.6539+ | 0.906+ | 
+    
+      | Logistic Regression+ | 0.6364+ | 0.6400+ | 0.64+ | 0.5962+ | 0.6048+ | 0.2700+ | 0.2835+ | 0.6697+ | 0.798+ | 
+    
+      | Quadratic Discriminant Analysis+ | 0.6364+ | 0.6933+ | 0.72+ | 0.5851+ | 0.6353+ | 0.2815+ | 0.2899+ | 0.7075+ | 0.418+ | 
+    
+      | Linear Discriminant Analysis+ | 0.6364+ | 0.6467+ | 0.64+ | 0.5962+ | 0.6048+ | 0.2700+ | 0.2835+ | 0.6751+ | 0.364+ | 
+    
+      | Gradient Boosting Classifier+ | 0.6182+ | 0.6333+ | 0.60+ | 0.5843+ | 0.5846+ | 0.2328+ | 0.2389+ | 0.6403+ | 0.522+ | 
+    
+      | Ada Boost Classifier+ | 0.6182+ | 0.6567+ | 0.60+ | 0.5943+ | 0.5891+ | 0.2340+ | 0.2415+ | 0.6517+ | 0.560+ | 
+    
+      | Extra Trees Classifier+ | 0.6182+ | 0.5800+ | 0.56+ | 0.5876+ | 0.5622+ | 0.2266+ | 0.2347+ | 0.6413+ | 0.468+ | 
+    
+      | Decision Tree Classifier+ | 0.6000+ | 0.5967+ | 0.56+ | 0.5867+ | 0.5533+ | 0.1950+ | 0.2060+ | 0.5215+ | 1.532+ | 
+    
+      | CatBoost Classifier+ | 0.5818+ | 0.6667+ | 0.48+ | 0.5133+ | 0.4845+ | 0.1454+ | 0.1414+ | 0.6991+ | 3.426+ | 
+    
+      | SVM - Linear Kernel+ | 0.5455+ | 0.5000+ | 0.40+ | 0.5033+ | 0.4332+ | 0.0684+ | 0.0685+ | 0.0000+ | 1.666+ | 
+    
+      | Dummy Classifier+ | 0.5455+ | 0.5000+ | 0.00+ | 0.0000+ | 0.0000+ | 0.0000+ | 0.0000+ | 0.4545+ | 0.456+ | 
+    
+      | Extreme Gradient Boosting+ | 0.5273+ | 0.5600+ | 0.52+ | 0.4967+ | 0.5042+ | 0.0550+ | 0.0564+ | 0.5943+ | 0.336+ | 
+  
+
+                
+                
Results on the Test Set for the best model
+                
+                    
+  
+    
+      | Model+ | Accuracy+ | ROC-AUC+ | Recall+ | Prec.+ | F1+ | Kappa+ | MCC+ | PR-AUC-Weighted+ | 
+  
+  
+    
+      | Light Gradient Boosting Machine+ | 0.7857+ | 0.7604+ | 0.6667+ | 0.8+ | 0.7273+ | 0.5532+ | 0.5594+ | 0.7502+ | 
+  
+
+                
+            
+            
+                
Best Model Plots on the testing set
+                
+            
+                
Confusion_matrix
+                

+            
+            
+            
+                
Auc
+                

+            
+            
+            
+                
Threshold
+                

+            
+            
+            
+                
Pr
+                

+            
+            
+            
+                
Error
+                

+            
+            
+            
+                
Class_report
+                

+            
+            
+            
+                
Learning
+                

+            
+            
+            
+                
Calibration
+                

+            
+            
+            
+                
Vc
+                

+            
+            
+            
+                
Dimension
+                

+            
+            
+            
+                
Manifold
+                

+            
+            
+            
+                
Rfe
+                

+            
+            
+            
+                
Feature
+                

+            
+            
+            
+                
Feature_all
+                

+            
+            
+            
+            
+                
+            
PyCaret Feature Importance Report
+            
+            
+                
Feature importance analysis from atrained Random Forest
+                
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+                

+            
+            
+            
+                
SHAP Summary from a trained lightgbm
+                
+                

+            
+            
+        
+            
+            
+        
+        
+    
+            
PyCaret Model Training Report
+            
+                
+                Setup & Best Model
+                
+                Best Model Plots
+                
+                Feature Importance
+                
+                Explainer
+                
+            
+            
+                
Setup Parameters
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | target+ | MPG+ | 
+    
+      | session_id+ | 42+ | 
+    
+      | index+ | False+ | 
+  
+
+                
+                
If you want to know all the experiment setup parameters,
+                  please check the PyCaret documentation for
+                  the classification/regression exp function.
+                
Best Model: GradientBoostingRegressor
+                
+                    | Parameter | Value | 
|---|
+                    
+  
+    
+      | alpha+ | 0.9+ | 
+    
+      | ccp_alpha+ | 0.0+ | 
+    
+      | criterion+ | friedman_mse+ | 
+    
+      | init+ | None+ | 
+    
+      | learning_rate+ | 0.1+ | 
+    
+      | loss+ | squared_error+ | 
+    
+      | max_depth+ | 3+ | 
+    
+      | max_features+ | None+ | 
+    
+      | max_leaf_nodes+ | None+ | 
+    
+      | min_impurity_decrease+ | 0.0+ | 
+    
+      | min_samples_leaf+ | 1+ | 
+    
+      | min_samples_split+ | 2+ | 
+    
+      | min_weight_fraction_leaf+ | 0.0+ | 
+    
+      | n_estimators+ | 100+ | 
+    
+      | n_iter_no_change+ | None+ | 
+    
+      | random_state+ | 42+ | 
+    
+      | subsample+ | 1.0+ | 
+    
+      | tol+ | 0.0001+ | 
+    
+      | validation_fraction+ | 0.1+ | 
+    
+      | verbose+ | 0+ | 
+    
+      | warm_start+ | False+ | 
+  
+
+                
+                
Comparison Results on the Cross-Validation Set
+                
+                    
+  
+    
+      | Model+ | MAE+ | MSE+ | RMSE+ | R2+ | RMSLE+ | MAPE+ | TT (Sec)+ | 
+  
+  
+    
+      | Gradient Boosting Regressor+ | 2.2775+ | 9.8743+ | 3.0921+ | 0.8383+ | 0.1197+ | 0.0980+ | 0.681+ | 
+    
+      | Extra Trees Regressor+ | 2.2119+ | 10.2477+ | 3.1304+ | 0.8323+ | 0.1220+ | 0.0949+ | 2.212+ | 
+    
+      | Light Gradient Boosting Machine+ | 2.3218+ | 10.4931+ | 3.1818+ | 0.8282+ | 0.1252+ | 0.1011+ | 0.263+ | 
+    
+      | CatBoost Regressor+ | 2.3204+ | 10.5063+ | 3.1906+ | 0.8270+ | 0.1256+ | 0.1011+ | 8.883+ | 
+    
+      | Random Forest Regressor+ | 2.3161+ | 11.0170+ | 3.2515+ | 0.8210+ | 0.1252+ | 0.0990+ | 1.916+ | 
+    
+      | Extreme Gradient Boosting+ | 2.4277+ | 11.9887+ | 3.3949+ | 0.8045+ | 0.1336+ | 0.1057+ | 0.497+ | 
+    
+      | Elastic Net+ | 2.6119+ | 12.1337+ | 3.4462+ | 0.8029+ | 0.1426+ | 0.1168+ | 0.116+ | 
+    
+      | Lasso Regression+ | 2.6238+ | 12.2869+ | 3.4649+ | 0.8011+ | 0.1438+ | 0.1172+ | 0.134+ | 
+    
+      | Lasso Least Angle Regression+ | 2.6238+ | 12.2868+ | 3.4649+ | 0.8011+ | 0.1438+ | 0.1172+ | 0.157+ | 
+    
+      | AdaBoost Regressor+ | 2.5949+ | 12.5846+ | 3.4968+ | 0.7939+ | 0.1378+ | 0.1153+ | 2.469+ | 
+    
+      | Bayesian Ridge+ | 2.6494+ | 12.5149+ | 3.5121+ | 0.7920+ | 0.1433+ | 0.1194+ | 0.268+ | 
+    
+      | Ridge Regression+ | 2.6852+ | 12.7684+ | 3.5480+ | 0.7872+ | 0.1448+ | 0.1212+ | 0.108+ | 
+    
+      | Linear Regression+ | 2.6893+ | 12.7997+ | 3.5523+ | 0.7866+ | 0.1450+ | 0.1214+ | 0.122+ | 
+    
+      | Least Angle Regression+ | 2.7583+ | 13.3766+ | 3.6327+ | 0.7759+ | 0.1489+ | 0.1249+ | 0.165+ | 
+    
+      | Huber Regressor+ | 2.6780+ | 14.2077+ | 3.7197+ | 0.7699+ | 0.1404+ | 0.1138+ | 1.508+ | 
+    
+      | Decision Tree Regressor+ | 2.6552+ | 15.5784+ | 3.8636+ | 0.7507+ | 0.1470+ | 0.1108+ | 0.253+ | 
+    
+      | Orthogonal Matching Pursuit+ | 3.3731+ | 20.2491+ | 4.4464+ | 0.6709+ | 0.1767+ | 0.1475+ | 0.418+ | 
+    
+      | K Neighbors Regressor+ | 3.4315+ | 21.1052+ | 4.5405+ | 0.6546+ | 0.1692+ | 0.1448+ | 0.858+ | 
+    
+      | Dummy Regressor+ | 6.6547+ | 62.8366+ | 7.8973+ | -0.0391+ | 0.3303+ | 0.3219+ | 0.129+ | 
+    
+      | Passive Aggressive Regressor+ | 7.5227+ | 84.7568+ | 9.0993+ | -0.4762+ | 0.4067+ | 0.3652+ | 0.420+ | 
+  
+
+                
+                
Results on the Test Set for the best model
+                
+                    
+  
+    
+      | Model+ | MAE+ | MSE+ | RMSE+ | R2+ | RMSLE+ | MAPE+ | 
+  
+  
+    
+      | Gradient Boosting Regressor+ | 2.2015+ | 9.911+ | 3.1482+ | 0.8273+ | 0.1198+ | 0.094+ | 
+  
+
+                
+            
+            
+                
Best Model Plots on the testing set
+                
+            
+                
Residuals
+                

+            
+            
+            
+                
Error
+                

+            
+            
+            
+                
Cooks
+                

+            
+            
+            
+                
Learning
+                

+            
+            
+            
+                
Vc
+                

+            
+            
+            
+                
Manifold
+                

+            
+            
+            
+                
Rfe
+                

+            
+            
+            
+                
Feature
+                

+            
+            
+            
+                
Feature_all
+                

+            
+            
+            
+            
+                
+            
PyCaret Feature Importance Report
+            
+            
+                
Feature importance analysis from atrained Random Forest
+                
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+                

+            
+            
+            
+                
SHAP Summary from a trained lightgbm
+                
+                

+            
+            
+        
+            
+            
+        
+        
+    """
+
+
+def get_html_closing():
+    return """
+        
+        
+    
+    
+    """
+
+
+def customize_figure_layout(fig, margin_dict=None):
+    """
+    Update the layout of a Plotly figure to reduce margins.
+
+    Parameters:
+        fig (plotly.graph_objects.Figure): The Plotly figure to customize.
+        margin_dict (dict, optional): A dictionary specifying margin sizes.
+            Example: {'l': 10, 'r': 10, 't': 10, 'b': 10}
+
+    Returns:
+        plotly.graph_objects.Figure: The updated Plotly figure.
+    """
+    if margin_dict is None:
+        # Set default smaller margins
+        margin_dict = {'l': 40, 'r': 40, 't': 40, 'b': 40}
+
+    fig.update_layout(margin=margin_dict)
+    return fig
+
+
+def add_plot_to_html(fig, include_plotlyjs=True):
+    custom_margin = {'l': 40, 'r': 40, 't': 60, 'b': 60}
+    fig = customize_figure_layout(fig, margin_dict=custom_margin)
+    return fig.to_html(full_html=False,
+                       default_height=350,
+                       include_plotlyjs="cdn" if include_plotlyjs else False)
+
+
+def add_hr_to_html():
+    return "
"
+
+
+def encode_image_to_base64(image_path):
+    """Convert an image file to a base64 encoded string."""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode("utf-8")