# HG changeset patch # User goeckslab # Date 1751482750 0 # Node ID 39202fe5cf9733b6a9edf46e26c2e451161dfcea # Parent 54b871dfc51ec8ad8e8213a639bfcd36c676fb1a planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e diff -r 54b871dfc51e -r 39202fe5cf97 image_learner.xml --- a/image_learner.xml Tue Jun 03 21:22:11 2025 +0000 +++ b/image_learner.xml Wed Jul 02 18:59:10 2025 +0000 @@ -222,9 +222,11 @@ - + - + + + diff -r 54b871dfc51e -r 39202fe5cf97 image_learner_cli.py --- a/image_learner_cli.py Tue Jun 03 21:22:11 2025 +0000 +++ b/image_learner_cli.py Wed Jul 02 18:59:10 2025 +0000 @@ -24,103 +24,254 @@ from utils import encode_image_to_base64, get_html_closing, get_html_template # --- Constants --- -SPLIT_COLUMN_NAME = 'split' -LABEL_COLUMN_NAME = 'label' -IMAGE_PATH_COLUMN_NAME = 'image_path' +SPLIT_COLUMN_NAME = "split" +LABEL_COLUMN_NAME = "label" +IMAGE_PATH_COLUMN_NAME = "image_path" DEFAULT_SPLIT_PROBABILITIES = [0.7, 0.1, 0.2] TEMP_CSV_FILENAME = "processed_data_for_ludwig.csv" TEMP_CONFIG_FILENAME = "ludwig_config.yaml" TEMP_DIR_PREFIX = "ludwig_api_work_" MODEL_ENCODER_TEMPLATES: Dict[str, Any] = { - 'stacked_cnn': 'stacked_cnn', - 'resnet18': {'type': 'resnet', 'model_variant': 18}, - 'resnet34': {'type': 'resnet', 'model_variant': 34}, - 'resnet50': {'type': 'resnet', 'model_variant': 50}, - 'resnet101': {'type': 'resnet', 'model_variant': 101}, - 'resnet152': {'type': 'resnet', 'model_variant': 152}, - 'resnext50_32x4d': {'type': 'resnext', 'model_variant': '50_32x4d'}, - 'resnext101_32x8d': {'type': 'resnext', 'model_variant': '101_32x8d'}, - 'resnext101_64x4d': {'type': 'resnext', 'model_variant': '101_64x4d'}, - 'resnext152_32x8d': {'type': 'resnext', 'model_variant': '152_32x8d'}, - 'wide_resnet50_2': {'type': 'wide_resnet', 'model_variant': '50_2'}, - 'wide_resnet101_2': {'type': 'wide_resnet', 'model_variant': '101_2'}, - 'wide_resnet103_2': {'type': 'wide_resnet', 'model_variant': '103_2'}, - 'efficientnet_b0': {'type': 'efficientnet', 'model_variant': 'b0'}, - 'efficientnet_b1': {'type': 'efficientnet', 'model_variant': 'b1'}, - 'efficientnet_b2': {'type': 'efficientnet', 'model_variant': 'b2'}, - 'efficientnet_b3': {'type': 'efficientnet', 'model_variant': 'b3'}, - 'efficientnet_b4': {'type': 'efficientnet', 'model_variant': 'b4'}, - 'efficientnet_b5': {'type': 'efficientnet', 'model_variant': 'b5'}, - 'efficientnet_b6': {'type': 'efficientnet', 'model_variant': 'b6'}, - 'efficientnet_b7': {'type': 'efficientnet', 'model_variant': 'b7'}, - 'efficientnet_v2_s': {'type': 'efficientnet', 'model_variant': 'v2_s'}, - 'efficientnet_v2_m': {'type': 'efficientnet', 'model_variant': 'v2_m'}, - 'efficientnet_v2_l': {'type': 'efficientnet', 'model_variant': 'v2_l'}, - 'regnet_y_400mf': {'type': 'regnet', 'model_variant': 'y_400mf'}, - 'regnet_y_800mf': {'type': 'regnet', 'model_variant': 'y_800mf'}, - 'regnet_y_1_6gf': {'type': 'regnet', 'model_variant': 'y_1_6gf'}, - 'regnet_y_3_2gf': {'type': 'regnet', 'model_variant': 'y_3_2gf'}, - 'regnet_y_8gf': {'type': 'regnet', 'model_variant': 'y_8gf'}, - 'regnet_y_16gf': {'type': 'regnet', 'model_variant': 'y_16gf'}, - 'regnet_y_32gf': {'type': 'regnet', 'model_variant': 'y_32gf'}, - 'regnet_y_128gf': {'type': 'regnet', 'model_variant': 'y_128gf'}, - 'regnet_x_400mf': {'type': 'regnet', 'model_variant': 'x_400mf'}, - 'regnet_x_800mf': {'type': 'regnet', 'model_variant': 'x_800mf'}, - 'regnet_x_1_6gf': {'type': 'regnet', 'model_variant': 'x_1_6gf'}, - 'regnet_x_3_2gf': {'type': 'regnet', 'model_variant': 'x_3_2gf'}, - 'regnet_x_8gf': {'type': 'regnet', 'model_variant': 'x_8gf'}, - 'regnet_x_16gf': {'type': 'regnet', 'model_variant': 'x_16gf'}, - 'regnet_x_32gf': {'type': 'regnet', 'model_variant': 'x_32gf'}, - 'vgg11': {'type': 'vgg', 'model_variant': 11}, - 'vgg11_bn': {'type': 'vgg', 'model_variant': '11_bn'}, - 'vgg13': {'type': 'vgg', 'model_variant': 13}, - 'vgg13_bn': {'type': 'vgg', 'model_variant': '13_bn'}, - 'vgg16': {'type': 'vgg', 'model_variant': 16}, - 'vgg16_bn': {'type': 'vgg', 'model_variant': '16_bn'}, - 'vgg19': {'type': 'vgg', 'model_variant': 19}, - 'vgg19_bn': {'type': 'vgg', 'model_variant': '19_bn'}, - 'shufflenet_v2_x0_5': {'type': 'shufflenet_v2', 'model_variant': 'x0_5'}, - 'shufflenet_v2_x1_0': {'type': 'shufflenet_v2', 'model_variant': 'x1_0'}, - 'shufflenet_v2_x1_5': {'type': 'shufflenet_v2', 'model_variant': 'x1_5'}, - 'shufflenet_v2_x2_0': {'type': 'shufflenet_v2', 'model_variant': 'x2_0'}, - 'squeezenet1_0': {'type': 'squeezenet', 'model_variant': '1_0'}, - 'squeezenet1_1': {'type': 'squeezenet', 'model_variant': '1_1'}, - 'swin_t': {'type': 'swin_transformer', 'model_variant': 't'}, - 'swin_s': {'type': 'swin_transformer', 'model_variant': 's'}, - 'swin_b': {'type': 'swin_transformer', 'model_variant': 'b'}, - 'swin_v2_t': {'type': 'swin_transformer', 'model_variant': 'v2_t'}, - 'swin_v2_s': {'type': 'swin_transformer', 'model_variant': 'v2_s'}, - 'swin_v2_b': {'type': 'swin_transformer', 'model_variant': 'v2_b'}, - 'vit_b_16': {'type': 'vision_transformer', 'model_variant': 'b_16'}, - 'vit_b_32': {'type': 'vision_transformer', 'model_variant': 'b_32'}, - 'vit_l_16': {'type': 'vision_transformer', 'model_variant': 'l_16'}, - 'vit_l_32': {'type': 'vision_transformer', 'model_variant': 'l_32'}, - 'vit_h_14': {'type': 'vision_transformer', 'model_variant': 'h_14'}, - 'convnext_tiny': {'type': 'convnext', 'model_variant': 'tiny'}, - 'convnext_small': {'type': 'convnext', 'model_variant': 'small'}, - 'convnext_base': {'type': 'convnext', 'model_variant': 'base'}, - 'convnext_large': {'type': 'convnext', 'model_variant': 'large'}, - 'maxvit_t': {'type': 'maxvit', 'model_variant': 't'}, - 'alexnet': {'type': 'alexnet'}, - 'googlenet': {'type': 'googlenet'}, - 'inception_v3': {'type': 'inception_v3'}, - 'mobilenet_v2': {'type': 'mobilenet_v2'}, - 'mobilenet_v3_large': {'type': 'mobilenet_v3_large'}, - 'mobilenet_v3_small': {'type': 'mobilenet_v3_small'}, + "stacked_cnn": "stacked_cnn", + "resnet18": {"type": "resnet", "model_variant": 18}, + "resnet34": {"type": "resnet", "model_variant": 34}, + "resnet50": {"type": "resnet", "model_variant": 50}, + "resnet101": {"type": "resnet", "model_variant": 101}, + "resnet152": {"type": "resnet", "model_variant": 152}, + "resnext50_32x4d": {"type": "resnext", "model_variant": "50_32x4d"}, + "resnext101_32x8d": {"type": "resnext", "model_variant": "101_32x8d"}, + "resnext101_64x4d": {"type": "resnext", "model_variant": "101_64x4d"}, + "resnext152_32x8d": {"type": "resnext", "model_variant": "152_32x8d"}, + "wide_resnet50_2": {"type": "wide_resnet", "model_variant": "50_2"}, + "wide_resnet101_2": {"type": "wide_resnet", "model_variant": "101_2"}, + "wide_resnet103_2": {"type": "wide_resnet", "model_variant": "103_2"}, + "efficientnet_b0": {"type": "efficientnet", "model_variant": "b0"}, + "efficientnet_b1": {"type": "efficientnet", "model_variant": "b1"}, + "efficientnet_b2": {"type": "efficientnet", "model_variant": "b2"}, + "efficientnet_b3": {"type": "efficientnet", "model_variant": "b3"}, + "efficientnet_b4": {"type": "efficientnet", "model_variant": "b4"}, + "efficientnet_b5": {"type": "efficientnet", "model_variant": "b5"}, + "efficientnet_b6": {"type": "efficientnet", "model_variant": "b6"}, + "efficientnet_b7": {"type": "efficientnet", "model_variant": "b7"}, + "efficientnet_v2_s": {"type": "efficientnet", "model_variant": "v2_s"}, + "efficientnet_v2_m": {"type": "efficientnet", "model_variant": "v2_m"}, + "efficientnet_v2_l": {"type": "efficientnet", "model_variant": "v2_l"}, + "regnet_y_400mf": {"type": "regnet", "model_variant": "y_400mf"}, + "regnet_y_800mf": {"type": "regnet", "model_variant": "y_800mf"}, + "regnet_y_1_6gf": {"type": "regnet", "model_variant": "y_1_6gf"}, + "regnet_y_3_2gf": {"type": "regnet", "model_variant": "y_3_2gf"}, + "regnet_y_8gf": {"type": "regnet", "model_variant": "y_8gf"}, + "regnet_y_16gf": {"type": "regnet", "model_variant": "y_16gf"}, + "regnet_y_32gf": {"type": "regnet", "model_variant": "y_32gf"}, + "regnet_y_128gf": {"type": "regnet", "model_variant": "y_128gf"}, + "regnet_x_400mf": {"type": "regnet", "model_variant": "x_400mf"}, + "regnet_x_800mf": {"type": "regnet", "model_variant": "x_800mf"}, + "regnet_x_1_6gf": {"type": "regnet", "model_variant": "x_1_6gf"}, + "regnet_x_3_2gf": {"type": "regnet", "model_variant": "x_3_2gf"}, + "regnet_x_8gf": {"type": "regnet", "model_variant": "x_8gf"}, + "regnet_x_16gf": {"type": "regnet", "model_variant": "x_16gf"}, + "regnet_x_32gf": {"type": "regnet", "model_variant": "x_32gf"}, + "vgg11": {"type": "vgg", "model_variant": 11}, + "vgg11_bn": {"type": "vgg", "model_variant": "11_bn"}, + "vgg13": {"type": "vgg", "model_variant": 13}, + "vgg13_bn": {"type": "vgg", "model_variant": "13_bn"}, + "vgg16": {"type": "vgg", "model_variant": 16}, + "vgg16_bn": {"type": "vgg", "model_variant": "16_bn"}, + "vgg19": {"type": "vgg", "model_variant": 19}, + "vgg19_bn": {"type": "vgg", "model_variant": "19_bn"}, + "shufflenet_v2_x0_5": {"type": "shufflenet_v2", "model_variant": "x0_5"}, + "shufflenet_v2_x1_0": {"type": "shufflenet_v2", "model_variant": "x1_0"}, + "shufflenet_v2_x1_5": {"type": "shufflenet_v2", "model_variant": "x1_5"}, + "shufflenet_v2_x2_0": {"type": "shufflenet_v2", "model_variant": "x2_0"}, + "squeezenet1_0": {"type": "squeezenet", "model_variant": "1_0"}, + "squeezenet1_1": {"type": "squeezenet", "model_variant": "1_1"}, + "swin_t": {"type": "swin_transformer", "model_variant": "t"}, + "swin_s": {"type": "swin_transformer", "model_variant": "s"}, + "swin_b": {"type": "swin_transformer", "model_variant": "b"}, + "swin_v2_t": {"type": "swin_transformer", "model_variant": "v2_t"}, + "swin_v2_s": {"type": "swin_transformer", "model_variant": "v2_s"}, + "swin_v2_b": {"type": "swin_transformer", "model_variant": "v2_b"}, + "vit_b_16": {"type": "vision_transformer", "model_variant": "b_16"}, + "vit_b_32": {"type": "vision_transformer", "model_variant": "b_32"}, + "vit_l_16": {"type": "vision_transformer", "model_variant": "l_16"}, + "vit_l_32": {"type": "vision_transformer", "model_variant": "l_32"}, + "vit_h_14": {"type": "vision_transformer", "model_variant": "h_14"}, + "convnext_tiny": {"type": "convnext", "model_variant": "tiny"}, + "convnext_small": {"type": "convnext", "model_variant": "small"}, + "convnext_base": {"type": "convnext", "model_variant": "base"}, + "convnext_large": {"type": "convnext", "model_variant": "large"}, + "maxvit_t": {"type": "maxvit", "model_variant": "t"}, + "alexnet": {"type": "alexnet"}, + "googlenet": {"type": "googlenet"}, + "inception_v3": {"type": "inception_v3"}, + "mobilenet_v2": {"type": "mobilenet_v2"}, + "mobilenet_v3_large": {"type": "mobilenet_v3_large"}, + "mobilenet_v3_small": {"type": "mobilenet_v3_small"}, +} +METRIC_DISPLAY_NAMES = { + "accuracy": "Accuracy", + "accuracy_micro": "Accuracy-Micro", + "loss": "Loss", + "roc_auc": "ROC-AUC", + "roc_auc_macro": "ROC-AUC-Macro", + "roc_auc_micro": "ROC-AUC-Micro", + "hits_at_k": "Hits at K", + "precision": "Precision", + "recall": "Recall", + "specificity": "Specificity", + "kappa_score": "Cohen's Kappa", + "token_accuracy": "Token Accuracy", + "avg_precision_macro": "Precision-Macro", + "avg_recall_macro": "Recall-Macro", + "avg_f1_score_macro": "F1-score-Macro", + "avg_precision_micro": "Precision-Micro", + "avg_recall_micro": "Recall-Micro", + "avg_f1_score_micro": "F1-score-Micro", + "avg_precision_weighted": "Precision-Weighted", + "avg_recall_weighted": "Recall-Weighted", + "avg_f1_score_weighted": "F1-score-Weighted", + "average_precision_macro": " Precision-Average-Macro", + "average_precision_micro": "Precision-Average-Micro", + "average_precision_samples": "Precision-Average-Samples", } # --- Logging Setup --- logging.basicConfig( level=logging.INFO, - format='%(asctime)s %(levelname)s %(name)s: %(message)s' + format="%(asctime)s %(levelname)s %(name)s: %(message)s", ) logger = logging.getLogger("ImageLearner") +def get_metrics_help_modal() -> str: + modal_html = """ +

+ × +

Model Evaluation Metrics — Help Guide

1) General Metrics

Loss: Measures the difference between predicted and actual values. Lower is better. Often used for optimization during training.

Accuracy: Proportion of correct predictions among all predictions. Simple but can be misleading for imbalanced datasets.

Micro Accuracy: Calculates accuracy by summing up all individual true positives and true negatives across all classes, making it suitable for multiclass or multilabel problems.

Token Accuracy: Measures how often the predicted tokens (e.g., in sequences) match the true tokens. Useful in sequence prediction tasks like NLP.

2) Precision, Recall & Specificity

Precision: Out of all positive predictions, how many were correct. Precision = TP / (TP + FP). Helps when false positives are costly.

Recall (Sensitivity): Out of all actual positives, how many were predicted correctly. Recall = TP / (TP + FN). Important when missing positives is risky.

Specificity: True negative rate. Measures how well the model identifies negatives. Specificity = TN / (TN + FP). Useful in medical testing to avoid false alarms.

3) Macro, Micro, and Weighted Averages

Macro Precision / Recall / F1: Averages the metric across all classes, treating each class equally, regardless of class frequency. Best when class sizes are balanced.

Micro Precision / Recall / F1: Aggregates TP, FP, FN across all classes before computing the metric. Gives a global view and is ideal for class-imbalanced problems.

Weighted Precision / Recall / F1: Averages each metric across classes, weighted by the number of true instances per class. Balances importance of classes based on frequency.

4) Average Precision (PR-AUC Variants)

Average Precision Macro: Precision-Recall AUC averaged across all classes equally. Useful for balanced multi-class problems.

Average Precision Micro: Global Precision-Recall AUC using all instances. Best for imbalanced data or multi-label classification.

Average Precision Samples: Precision-Recall AUC averaged across individual samples (not classes). Ideal for multi-label problems where each sample can belong to multiple classes.

5) ROC-AUC Variants

ROC-AUC: Measures model's ability to distinguish between classes. AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.

Macro ROC-AUC: Averages the AUC across all classes equally. Suitable when classes are balanced and of equal importance.

Micro ROC-AUC: Computes AUC from aggregated predictions across all classes. Useful in multiclass or multilabel settings with imbalance.

6) Ranking Metrics

Hits at K: Measures whether the true label is among the top-K predictions. Common in recommendation systems and retrieval tasks.

7) Confusion Matrix Stats (Per Class)

True Positives / Negatives (TP / TN): Correct predictions for positives and negatives respectively.

False Positives / Negatives (FP / FN): Incorrect predictions — false alarms and missed detections.

8) Other Useful Metrics

Cohen's Kappa: Measures agreement between predicted and actual values adjusted for chance. Useful for multiclass classification with imbalanced labels.

Matthews Correlation Coefficient (MCC): Balanced measure of prediction quality that takes into account TP, TN, FP, and FN. Particularly effective for imbalanced datasets.

9) Metric Recommendations

Use Accuracy + F1 for balanced data.
Use Precision, Recall, ROC-AUC for imbalanced datasets.
Use Average Precision Micro for multilabel or class-imbalanced problems.
Use Macro scores when all classes should be treated equally.
Use Weighted scores when class imbalance should be accounted for without ignoring small classes.
Use Confusion Matrix stats to analyze class-wise performance.
Use Hits at K for recommendation or ranking-based tasks.

+""" + modal_css = """ + +""" + modal_js = """ + +""" + return modal_css + modal_html + modal_js + + def format_config_table_html( - config: dict, - split_info: Optional[str] = None, - training_progress: dict = None) -> str: + config: dict, + split_info: Optional[str] = None, + training_progress: dict = None, +) -> str: display_keys = [ "model_name", "epochs", @@ -143,9 +294,7 @@ if training_progress: val = "Auto-selected batch size by Ludwig:
" resolved_val = training_progress.get("batch_size") - val += ( - f"{resolved_val}
" - ) + val += f"{resolved_val}
" else: val = "auto" if key == "learning_rate": @@ -155,11 +304,14 @@ resolved_val = training_progress.get("learning_rate") val = ( "Auto-selected learning rate by Ludwig:
" - f"{resolved_val if resolved_val else val}
" + f"" + f"{resolved_val if resolved_val else val}
" "" - "Based on model architecture and training setup (e.g., fine-tuning).
" - "See Ludwig Trainer Parameters for details." + "Based on model architecture and training setup " + "(e.g., fine-tuning).
" + "See " + "Ludwig Trainer Parameters for details." "" ) else: @@ -167,16 +319,21 @@ "Auto-selected by Ludwig
" "" "Automatically tuned based on architecture and dataset.
" - "See Ludwig Trainer Parameters for details." + "See " + "Ludwig Trainer Parameters for details." "" ) else: val = f"{val:.6f}" if key == "epochs": - if training_progress and "epoch" in training_progress and val > training_progress["epoch"]: + if ( + training_progress + and "epoch" in training_progress + and val > training_progress["epoch"] + ): val = ( - f"Because of early stopping: the training" + f"Because of early stopping: the training " f"stopped at epoch {training_progress['epoch']}" ) @@ -186,15 +343,18 @@ f"" f"" f"{key.replace('_', ' ').title()}" - f"{val}" + f"" + f"{val}" f"" ) if split_info: rows.append( f"" - f"Data Split" - f"{split_info}" + f"" + f"Data Split" + f"" + f"{split_info}" f"" ) @@ -203,23 +363,36 @@ "

" "" "" - "" - "" + "" + "" "" + "".join(rows) + "

Parameter	Value	" + "Parameter	" + "Value

" "

" "Model trained using Ludwig.
" "If want to learn more about Ludwig default settings," - "please check the their website(ludwig.ai)." + "please check the their " + "website(ludwig.ai)." "

" ) -def format_stats_table_html(training_stats: dict, test_stats: dict) -> str: - train_metrics = training_stats.get("training", {}).get("label", {}) - val_metrics = training_stats.get("validation", {}).get("label", {}) - test_metrics = test_stats.get("label", {}) +def detect_output_type(test_stats): + """Detects if the output type is 'binary' or 'category' based on test statistics.""" + label_stats = test_stats.get("label", {}) + per_class = label_stats.get("per_class_stats", {}) + if len(per_class) == 2: + return "binary" + return "category" - all_metrics = set(train_metrics) | set(val_metrics) | set(test_metrics) + +def extract_metrics_from_json( + train_stats: dict, + test_stats: dict, + output_type: str, +) -> dict: + """Extracts relevant metrics from training and test statistics based on the output type.""" + metrics = {"training": {}, "validation": {}, "test": {}} def get_last_value(stats, key): val = stats.get(key) @@ -229,48 +402,203 @@ return val return None - rows = [] - for metric in sorted(all_metrics): - t = get_last_value(train_metrics, metric) - v = get_last_value(val_metrics, metric) - te = get_last_value(test_metrics, metric) - if all(x is not None for x in [t, v, te]): - row = ( - f"" - f"{metric}" - f"{t:.4f}" - f"{v:.4f}" - f"{te:.4f}" - f"" - ) - rows.append(row) + for split in ["training", "validation"]: + split_stats = train_stats.get(split, {}) + if not split_stats: + logging.warning(f"No statistics found for {split} split") + continue + label_stats = split_stats.get("label", {}) + if not label_stats: + logging.warning(f"No label statistics found for {split} split") + continue + if output_type == "binary": + metrics[split] = { + "accuracy": get_last_value(label_stats, "accuracy"), + "loss": get_last_value(label_stats, "loss"), + "precision": get_last_value(label_stats, "precision"), + "recall": get_last_value(label_stats, "recall"), + "specificity": get_last_value(label_stats, "specificity"), + "roc_auc": get_last_value(label_stats, "roc_auc"), + } + else: + metrics[split] = { + "accuracy": get_last_value(label_stats, "accuracy"), + "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), + "loss": get_last_value(label_stats, "loss"), + "roc_auc": get_last_value(label_stats, "roc_auc"), + "hits_at_k": get_last_value(label_stats, "hits_at_k"), + } + + # Test metrics: dynamic extraction according to exclusions + test_label_stats = test_stats.get("label", {}) + if not test_label_stats: + logging.warning("No label statistics found for test split") + else: + combined_stats = test_stats.get("combined", {}) + overall_stats = test_label_stats.get("overall_stats", {}) - if not rows: - return "

No metric values found.

" + # Define exclusions + if output_type == "binary": + exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} + else: + exclude = {"per_class_stats", "confusion_matrix"} + + # 1. Get all scalar test_label_stats not excluded + test_metrics = {} + for k, v in test_label_stats.items(): + if k in exclude: + continue + if k == "overall_stats": + continue + if isinstance(v, (int, float, str, bool)): + test_metrics[k] = v + # 2. Add overall_stats (flattened) + for k, v in overall_stats.items(): + test_metrics[k] = v + + # 3. Optionally include combined/loss if present and not already + if "loss" in combined_stats and "loss" not in test_metrics: + test_metrics["loss"] = combined_stats["loss"] + + metrics["test"] = test_metrics + + return metrics + + +def generate_table_row(cells, styles): + """Helper function to generate an HTML table row.""" return ( - "

Model Performance Summary

" - "

" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" - "" + "".join(rows) + "

Metric	Train	Validation	Test

" + "" + + "".join(f"{cell}" for cell in cells) + + "" ) -def build_tabbed_html( - metrics_html: str, - train_viz_html: str, - test_viz_html: str) -> str: +def format_stats_table_html(train_stats: dict, test_stats: dict) -> str: + """Formats a combined HTML table for training, validation, and test metrics.""" + output_type = detect_output_type(test_stats) + all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) + rows = [] + for metric_key in sorted(all_metrics["training"].keys()): + if ( + metric_key in all_metrics["validation"] + and metric_key in all_metrics["test"] + ): + display_name = METRIC_DISPLAY_NAMES.get( + metric_key, + metric_key.replace("_", " ").title(), + ) + t = all_metrics["training"].get(metric_key) + v = all_metrics["validation"].get(metric_key) + te = all_metrics["test"].get(metric_key) + if all(x is not None for x in [t, v, te]): + rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) + + if not rows: + return "

No metric values found.

" + + html = ( + "

Model Performance Summary

" + "

" + "" + "" + "" + "" + "" + "" + "" + ) + for row in rows: + html += generate_table_row( + row, + "padding: 10px; border: 1px solid #ccc; text-align: center; " + "white-space: nowrap;", + ) + html += "

Metric	Train	Validation	Test

" + return html + + +def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: + """Formats an HTML table for training and validation metrics.""" + output_type = detect_output_type(test_stats) + all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) + rows = [] + for metric_key in sorted(all_metrics["training"].keys()): + if metric_key in all_metrics["validation"]: + display_name = METRIC_DISPLAY_NAMES.get( + metric_key, + metric_key.replace("_", " ").title(), + ) + t = all_metrics["training"].get(metric_key) + v = all_metrics["validation"].get(metric_key) + if t is not None and v is not None: + rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) + + if not rows: + return "

No metric values found for Train/Validation.

" + + html = ( + "

Train/Validation Performance Summary

" + "

" + "" + "" + "" + "" + "" + "" + ) + for row in rows: + html += generate_table_row( + row, + "padding: 10px; border: 1px solid #ccc; text-align: center; " + "white-space: nowrap;", + ) + html += "

Metric	Train	Validation

" + return html + + +def format_test_merged_stats_table_html(test_metrics: Dict[str, Optional[float]]) -> str: + """Formats an HTML table for test metrics.""" + rows = [] + for key in sorted(test_metrics.keys()): + display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) + value = test_metrics[key] + if value is not None: + rows.append([display_name, f"{value:.4f}"]) + + if not rows: + return "

No test metric values found.

" + + html = ( + "

Test Performance Summary

" + "

" + "" + "" + "" + "" + "" + ) + for row in rows: + html += generate_table_row( + row, + "padding: 10px; border: 1px solid #ccc; text-align: center; " + "white-space: nowrap;", + ) + html += "

Metric	Test

" + return html + + +def build_tabbed_html(metrics_html: str, train_val_html: str, test_html: str) -> str: return f""" -

Config & Metrics

Train/Validation Plots

Test Plots

Config & Results Summary

Train/Validation Results

Test Results

{metrics_html}

- {train_viz_html} + {train_val_html}

- {test_viz_html} + {test_html}