annotate ludwig_backend.py @ 17:db9be962dc13 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
author goeckslab
date Wed, 10 Dec 2025 00:24:13 +0000
parents 8729f69e9207
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1 import inspect
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
2 import json
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
3 import logging
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
4 import os
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
5 from pathlib import Path
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
6 from typing import Any, Dict, List, Optional, Protocol, Tuple
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
7
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
8 import pandas as pd
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
9 import pandas.api.types as ptypes
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
10 import yaml
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
11 from constants import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
12 IMAGE_PATH_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
13 LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
14 MODEL_ENCODER_TEMPLATES,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
15 SPLIT_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
16 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
17 from html_structure import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
18 build_tabbed_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
19 encode_image_to_base64,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
20 format_config_table_html,
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
21 format_dataset_overview_table,
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
22 format_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
23 format_test_merged_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
24 format_train_val_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
25 get_html_closing,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
26 get_html_template,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
27 get_metrics_help_modal,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
28 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
29 from ludwig.globals import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
30 DESCRIPTION_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
31 PREDICTIONS_PARQUET_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
32 TEST_STATISTICS_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
33 TRAIN_SET_METADATA_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
34 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
35 from ludwig.utils.data_utils import get_split_path
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
36 from metaformer_setup import get_visualizations_registry, META_DEFAULT_CFGS
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
37 from plotly_plots import (
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
38 build_binary_threshold_plot,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
39 build_classification_plots,
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
40 build_multiclass_metric_plots,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
41 build_prediction_diagnostics,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
42 build_regression_test_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
43 build_regression_train_val_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
44 build_train_validation_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
45 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
46 from utils import detect_output_type, extract_metrics_from_json
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
47
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
48 logger = logging.getLogger("ImageLearner")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
49
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
50
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
51 class Backend(Protocol):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
52 """Interface for a machine learning backend."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
53
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
54 def prepare_config(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
55 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
56 config_params: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
57 split_config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
58 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
59 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
60
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
61 def run_experiment(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
62 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
63 dataset_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
64 config_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
65 output_dir: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
66 random_seed: int,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
67 ) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
68 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
69
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
70 def generate_plots(self, output_dir: Path) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
71 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
72
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
73 def generate_html_report(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
74 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
75 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
76 output_dir: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
77 config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
78 split_info: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
79 ) -> Path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
80 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
81
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
82
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
83 class LudwigDirectBackend:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
84 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
85
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
86 _torchvision_patched = False
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
87
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
88 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
89 """Detect image dimensions from the first image in the dataset."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
90 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
91 import zipfile
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
92 from PIL import Image
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
93 import io
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
94
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
95 # Check if image_zip is provided
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
96 if not image_zip_path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
97 logger.warning("No image zip provided, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
98 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
99
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
100 # Extract first image to detect dimensions
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
101 with zipfile.ZipFile(image_zip_path, 'r') as z:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
102 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
103 if not image_files:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
104 logger.warning("No image files found in zip, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
105 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
106
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
107 # Check first image
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
108 with z.open(image_files[0]) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
109 img = Image.open(io.BytesIO(f.read()))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
110 width, height = img.size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
111 logger.info(f"Detected image dimensions: {width}x{height}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
112 return height, width # Return as (height, width) to match encoder config
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
113
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
114 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
115 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
116 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
117
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
118 def prepare_config(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
119 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
120 config_params: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
121 split_config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
122 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
123 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
124
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
125 model_name = config_params.get("model_name", "resnet18")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
126 use_pretrained = config_params.get("use_pretrained", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
127 fine_tune = config_params.get("fine_tune", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
128 if use_pretrained:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
129 trainable = bool(fine_tune)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
130 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
131 trainable = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
132 epochs = config_params.get("epochs", 10)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
133 batch_size = config_params.get("batch_size")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
134 num_processes = config_params.get("preprocessing_num_processes", 1)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
135 early_stop = config_params.get("early_stop", None)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
136 learning_rate = config_params.get("learning_rate")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
137 learning_rate = "auto" if learning_rate is None else float(learning_rate)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
138 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
139
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
140 # --- MetaFormer detection and config logic ---
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
141 def _is_metaformer(name: str) -> bool:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
142 return isinstance(name, str) and name.startswith(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
143 (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
144 "identityformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
145 "randformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
146 "poolformerv2_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
147 "convformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
148 "caformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
149 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
150 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
151
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
152 # Check if this is a MetaFormer model (either direct name or in custom_model)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
153 is_metaformer = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
154 _is_metaformer(model_name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
155 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"]))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
156 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
157
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
158 metaformer_resize: Optional[Tuple[int, int]] = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
159 metaformer_channels = 3
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
160
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
161 if is_metaformer:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
162 # Handle MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
163 custom_model = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
164 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
165 custom_model = raw_encoder["custom_model"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
166 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
167 custom_model = model_name
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
168
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
169 logger.info(f"DETECTED MetaFormer model: {custom_model}")
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
170 # Stash the model name for patched Stacked2DCNN in case Ludwig drops custom_model from kwargs
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
171 try:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
172 from MetaFormer.metaformer_stacked_cnn import set_current_metaformer_model
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
173
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
174 set_current_metaformer_model(custom_model)
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
175 except Exception:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
176 logger.debug("Could not set current MetaFormer model hint; proceeding without global override")
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
177 # Also pass via environment to survive process boundaries (e.g., Ray workers)
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
178 os.environ["GLEAM_META_FORMER_MODEL"] = custom_model
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
179 cfg_channels, cfg_height, cfg_width = 3, 224, 224
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
180 model_cfg = {}
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
181 if META_DEFAULT_CFGS:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
182 model_cfg = META_DEFAULT_CFGS.get(custom_model, {})
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
183 input_size = model_cfg.get("input_size")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
184 if isinstance(input_size, (list, tuple)) and len(input_size) == 3:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
185 cfg_channels, cfg_height, cfg_width = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
186 int(input_size[0]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
187 int(input_size[1]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
188 int(input_size[2]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
189 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
190
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
191 weights_url = None
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
192 if isinstance(model_cfg, dict):
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
193 weights_url = model_cfg.get("url")
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
194 logger.info(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
195 "MetaFormer cfg lookup: model=%s has_cfg=%s url=%s use_pretrained=%s",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
196 custom_model,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
197 bool(model_cfg),
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
198 weights_url,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
199 use_pretrained,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
200 )
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
201 if use_pretrained and not weights_url:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
202 logger.warning(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
203 "MetaFormer pretrained requested for %s but no URL found in default cfgs; model will be randomly initialized",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
204 custom_model,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
205 )
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
206
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
207 resize_value = config_params.get("image_resize")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
208 if resize_value and resize_value != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
209 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
210 dimensions = resize_value.split("x")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
211 if len(dimensions) == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
212 target_height, target_width = int(dimensions[0]), int(dimensions[1])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
213 if target_height <= 0 or target_width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
214 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
215 f"Image resize must be positive integers, received {resize_value}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
216 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
217 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
218 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
219 raise ValueError(resize_value)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
220 except (ValueError, IndexError):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
221 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
222 "Invalid image resize format '%s'; falling back to model default %sx%s",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
223 resize_value,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
224 cfg_height,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
225 cfg_width,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
226 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
227 target_height, target_width = cfg_height, cfg_width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
228 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
229 image_zip_path = config_params.get("image_zip", "")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
230 detected_height, detected_width = self._detect_image_dimensions(image_zip_path)
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
231 target_height, target_width = detected_height, detected_width
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
232 if use_pretrained and (detected_height, detected_width) != (cfg_height, cfg_width):
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
233 logger.info(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
234 "MetaFormer pretrained weights expect %sx%s; proceeding with detected %sx%s",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
235 cfg_height,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
236 cfg_width,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
237 detected_height,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
238 detected_width,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
239 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
240 if target_height <= 0 or target_width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
241 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
242 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
243 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
244
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
245 metaformer_channels = cfg_channels
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
246 metaformer_resize = (target_height, target_width)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
247
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
248 encoder_config = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
249 "type": "stacked_cnn",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
250 "height": target_height,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
251 "width": target_width,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
252 "num_channels": metaformer_channels,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
253 "output_size": 128,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
254 "use_pretrained": use_pretrained,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
255 "trainable": trainable,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
256 "custom_model": custom_model,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
257 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
258
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
259 elif isinstance(raw_encoder, dict):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
260 # Handle image resize for regular encoders
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
261 # Note: Standard encoders like ResNet don't support height/width parameters
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
262 # Resize will be handled at the preprocessing level by Ludwig
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
263 if config_params.get("image_resize") and config_params["image_resize"] != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
264 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
265
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
266 encoder_config = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
267 **raw_encoder,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
268 "use_pretrained": use_pretrained,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
269 "trainable": trainable,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
270 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
271 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
272 encoder_config = {"type": raw_encoder}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
273
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
274 # Set a human-friendly architecture string for reporting
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
275 arch_display = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
276 if is_metaformer and custom_model:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
277 arch_display = str(custom_model)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
278 elif isinstance(raw_encoder, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
279 enc_type = raw_encoder.get("type")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
280 enc_variant = raw_encoder.get("model_variant")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
281 if enc_type:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
282 base = str(enc_type).replace("_", " ").title()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
283 arch_display = f"{base} {enc_variant}" if enc_variant is not None else base
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
284 else:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
285 arch_display = str(raw_encoder).replace("_", " ").title()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
286
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
287 if not arch_display:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
288 arch_display = str(model_name)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
289 config_params["architecture"] = arch_display
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
290
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
291 batch_size_cfg = batch_size or "auto"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
292
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
293 label_column_path = config_params.get("label_column_data_path")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
294 label_series = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
295 label_metadata_hint = config_params.get("label_metadata") or {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
296 output_type_hint = config_params.get("output_type_hint")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
297 num_unique_labels = int(label_metadata_hint.get("num_unique", 2))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
298 numeric_binary_labels = bool(label_metadata_hint.get("is_numeric_binary", False))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
299 likely_regression = bool(label_metadata_hint.get("likely_regression", False))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
300 if label_column_path is not None and Path(label_column_path).exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
301 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
302 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
303 non_na = label_series.dropna()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
304 if not non_na.empty:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
305 num_unique_labels = non_na.nunique()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
306 is_numeric = ptypes.is_numeric_dtype(label_series.dtype)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
307 numeric_binary_labels = is_numeric and num_unique_labels == 2
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
308 likely_regression = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
309 is_numeric and not numeric_binary_labels and num_unique_labels > 10
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
310 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
311 if numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
312 logger.info(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
313 "Detected numeric binary labels in '%s'; configuring Ludwig for binary classification.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
314 LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
315 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
316 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
317 logger.warning(f"Could not read label column for task detection: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
318
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
319 if output_type_hint == "binary":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
320 num_unique_labels = 2
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
321 numeric_binary_labels = numeric_binary_labels or bool(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
322 label_metadata_hint.get("is_numeric", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
323 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
324
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
325 if numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
326 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
327 elif likely_regression:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
328 task_type = "regression"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
329 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
330 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
331
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
332 if task_type == "regression" and numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
333 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
334 "Numeric binary labels detected but regression task chosen; forcing classification to avoid invalid Ludwig config."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
335 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
336 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
337
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
338 config_params["task_type"] = task_type
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
339
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
340 image_feat: Dict[str, Any] = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
341 "name": IMAGE_PATH_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
342 "type": "image",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
343 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
344 # Set preprocessing dimensions FIRST for MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
345 if is_metaformer:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
346 if metaformer_resize is None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
347 metaformer_resize = (224, 224)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
348 height, width = metaformer_resize
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
349
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
350 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
351 # This is essential for MetaFormer models to work properly
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
352 if "preprocessing" not in image_feat:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
353 image_feat["preprocessing"] = {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
354 image_feat["preprocessing"]["height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
355 image_feat["preprocessing"]["width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
356 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
357 # but set explicit max dimensions to control the output size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
358 image_feat["preprocessing"]["infer_image_dimensions"] = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
359 image_feat["preprocessing"]["infer_image_max_height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
360 image_feat["preprocessing"]["infer_image_max_width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
361 image_feat["preprocessing"]["num_channels"] = metaformer_channels
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
362 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
363 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
364 # Force Ludwig to respect our dimensions by setting additional parameters
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
365 image_feat["preprocessing"]["requires_equal_dimensions"] = False
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
366 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)")
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
367 config_params["image_size"] = f"{height}x{width}"
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
368 # Now set the encoder configuration
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
369 image_feat["encoder"] = encoder_config
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
370
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
371 if config_params.get("augmentation") is not None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
372 image_feat["augmentation"] = config_params["augmentation"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
373
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
374 # Add resize configuration for standard encoders (ResNet, etc.)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
375 # FIXED: MetaFormer models now respect user dimensions completely
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
376 # Previously there was a double resize issue where MetaFormer would force 224x224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
377 # Now both MetaFormer and standard encoders respect user's resize choice
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
378 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
379 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
380 dimensions = config_params["image_resize"].split("x")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
381 if len(dimensions) == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
382 height, width = int(dimensions[0]), int(dimensions[1])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
383 if height <= 0 or width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
384 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
385 f"Image resize must be positive integers, received {config_params['image_resize']}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
386 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
387
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
388 # Add resize to preprocessing for standard encoders
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
389 if "preprocessing" not in image_feat:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
390 image_feat["preprocessing"] = {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
391 image_feat["preprocessing"]["height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
392 image_feat["preprocessing"]["width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
393 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
394 # but set explicit max dimensions to control the output size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
395 image_feat["preprocessing"]["infer_image_dimensions"] = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
396 image_feat["preprocessing"]["infer_image_max_height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
397 image_feat["preprocessing"]["infer_image_max_width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
398 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions")
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
399 config_params["image_size"] = f"{height}x{width}"
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
400 except (ValueError, IndexError):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
401 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing")
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
402 elif not is_metaformer:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
403 # No explicit resize provided; keep for reporting purposes
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
404 config_params.setdefault("image_size", "original")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
405
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
406 def _resolve_validation_metric(task: str, requested: Optional[str]) -> Optional[str]:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
407 """Pick a validation metric that Ludwig will accept for the resolved task."""
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
408 default_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
409 "regression": "pearson_r",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
410 "binary": "roc_auc",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
411 "category": "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
412 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
413 allowed_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
414 "regression": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
415 "pearson_r",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
416 "mean_absolute_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
417 "mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
418 "root_mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
419 "mean_absolute_percentage_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
420 "r2",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
421 "explained_variance",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
422 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
423 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
424 # Ludwig rejects f1 and balanced_accuracy for binary outputs; keep to known-safe set.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
425 "binary": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
426 "roc_auc",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
427 "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
428 "precision",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
429 "recall",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
430 "specificity",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
431 "log_loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
432 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
433 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
434 "category": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
435 "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
436 "balanced_accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
437 "precision",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
438 "recall",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
439 "f1",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
440 "specificity",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
441 "log_loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
442 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
443 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
444 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
445 alias_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
446 "regression": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
447 "mae": "mean_absolute_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
448 "mse": "mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
449 "rmse": "root_mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
450 "mape": "mean_absolute_percentage_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
451 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
452 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
453
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
454 default_metric = default_map.get(task)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
455 allowed = allowed_map.get(task, set())
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
456 metric = requested or default_metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
457
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
458 if metric is None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
459 return None
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
460
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
461 metric = alias_map.get(task, {}).get(metric, metric)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
462
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
463 if metric not in allowed:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
464 if requested:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
465 logger.warning(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
466 f"Validation metric '{requested}' is not supported for {task} outputs; using '{default_metric}' instead."
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
467 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
468 metric = default_metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
469 return metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
470
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
471 if task_type == "regression":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
472 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
473 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
474 "type": "number",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
475 "decoder": {"type": "regressor"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
476 "loss": {"type": "mean_squared_error"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
477 }
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
478 val_metric = _resolve_validation_metric("regression", config_params.get("validation_metric"))
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
479
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
480 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
481 if num_unique_labels == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
482 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
483 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
484 "type": "binary",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
485 "loss": {"type": "binary_weighted_cross_entropy"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
486 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
487 if config_params.get("threshold") is not None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
488 output_feat["threshold"] = float(config_params["threshold"])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
489 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
490 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
491 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
492 "type": "category",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
493 "loss": {"type": "softmax_cross_entropy"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
494 }
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
495 val_metric = _resolve_validation_metric(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
496 "binary" if num_unique_labels == 2 else "category",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
497 config_params.get("validation_metric"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
498 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
499
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
500 # Propagate the resolved validation metric (including any task-based fallback or alias normalization)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
501 config_params["validation_metric"] = val_metric
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
502
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
503 conf: Dict[str, Any] = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
504 "model_type": "ecd",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
505 "input_features": [image_feat],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
506 "output_features": [output_feat],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
507 "combiner": {"type": "concat"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
508 "trainer": {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
509 "epochs": epochs,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
510 "early_stop": early_stop,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
511 "batch_size": batch_size_cfg,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
512 "learning_rate": learning_rate,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
513 # set validation_metric when provided
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
514 **({"validation_metric": val_metric} if val_metric else {}),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
515 },
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
516 "preprocessing": {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
517 "split": split_config,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
518 "num_processes": num_processes,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
519 "in_memory": False,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
520 },
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
521 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
522
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
523 logger.debug("LudwigDirectBackend: Config dict built.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
524 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
525 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
526 logger.info("LudwigDirectBackend: YAML config generated.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
527 return yaml_str
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
528 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
529 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
530 "LudwigDirectBackend: Failed to serialize YAML.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
531 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
532 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
533 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
534
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
535 def _patch_torchvision_download(self) -> None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
536 """
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
537 Torchvision weight downloads sometimes fail checksum validation behind
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
538 corporate proxies that rewrite binaries. Skip hash checking to allow
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
539 pre-trained weights to load in those environments.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
540 """
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
541 if LudwigDirectBackend._torchvision_patched:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
542 return
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
543 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
544 import torch.hub as torch_hub
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
545
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
546 original = torch_hub.load_state_dict_from_url
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
547 original_download = torch_hub.download_url_to_file
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
548
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
549 def _no_hash(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
550 return original(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
551 url,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
552 model_dir=model_dir,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
553 map_location=map_location,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
554 progress=progress,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
555 check_hash=False,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
556 file_name=file_name,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
557 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
558
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
559 def _download_no_hash(url, dst, hash_prefix=None, progress=True):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
560 # Torchvision's download_url_to_file signature does not accept check_hash in older versions.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
561 return original_download(url, dst, hash_prefix=None, progress=progress)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
562
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
563 torch_hub.load_state_dict_from_url = _no_hash # type: ignore[assignment]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
564 torch_hub.download_url_to_file = _download_no_hash # type: ignore[assignment]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
565 LudwigDirectBackend._torchvision_patched = True
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
566 logger.info("Disabled torchvision weight hash verification to avoid proxy-corrupted downloads.")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
567 except Exception as exc:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
568 logger.warning(f"Could not patch torchvision download hash check: {exc}")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
569
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
570 def run_experiment(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
571 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
572 dataset_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
573 config_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
574 output_dir: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
575 random_seed: int = 42,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
576 ) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
577 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
578 logger.info("LudwigDirectBackend: Starting experiment execution.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
579
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
580 # Avoid strict hash validation for torchvision weights (common in proxied environments)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
581 self._patch_torchvision_download()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
582
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
583 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
584 from ludwig.experiment import experiment_cli
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
585 except ImportError as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
586 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
587 "LudwigDirectBackend: Could not import experiment_cli.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
588 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
589 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
590 raise RuntimeError("Ludwig import failed.") from e
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
591
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
592 output_dir.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
593
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
594 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
595 experiment_cli(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
596 dataset=str(dataset_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
597 config=str(config_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
598 output_directory=str(output_dir),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
599 random_seed=random_seed,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
600 skip_preprocessing=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
601 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
602 logger.info(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
603 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
604 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
605 except TypeError as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
606 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
607 "LudwigDirectBackend: Argument mismatch in experiment_cli call.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
608 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
609 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
610 raise RuntimeError("Ludwig argument error.") from e
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
611 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
612 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
613 "LudwigDirectBackend: Experiment execution error.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
614 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
615 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
616 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
617
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
618 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
619 """Retrieve the learning rate used in the most recent Ludwig run."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
620 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
621 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
622 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
623 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
624 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
625
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
626 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
627 logger.warning(f"No experiment run directories found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
628 return None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
629
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
630 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
631 if not progress_file.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
632 logger.warning(f"No training_progress.json found in {progress_file}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
633 return None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
634
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
635 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
636 with progress_file.open("r", encoding="utf-8") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
637 data = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
638 return {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
639 "learning_rate": data.get("learning_rate"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
640 "batch_size": data.get("batch_size"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
641 "epoch": data.get("epoch"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
642 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
643 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
644 logger.warning(f"Failed to read training progress info: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
645 return {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
646
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
647 def convert_parquet_to_csv(self, output_dir: Path):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
648 """Convert the predictions Parquet file to CSV."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
649 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
650 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
651 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
652 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
653 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
654 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
655 logger.warning(f"No experiment run dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
656 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
657 exp_dir = exp_dirs[-1]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
658 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
659 csv_path = exp_dir / "predictions.csv"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
660
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
661 # Check if parquet file exists before trying to convert
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
662 if not parquet_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
663 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
664 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
665
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
666 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
667 df = pd.read_parquet(parquet_path)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
668 df.to_csv(csv_path, index=False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
669 logger.info(f"Converted Parquet to CSV: {csv_path}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
670 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
671 logger.error(f"Error converting Parquet to CSV: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
672
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
673 @staticmethod
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
674 def _extract_metric_series(stats: Dict[str, Any], split: str, prefer: Optional[str] = None) -> Tuple[Optional[str], Optional[List[float]]]:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
675 """Pull the first numeric metric list we can find for the requested split."""
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
676 if not isinstance(stats, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
677 return None, None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
678
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
679 split_stats = stats.get(split, {})
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
680 ordered_metrics: List[Tuple[str, List[float]]] = []
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
681
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
682 def _append_metrics(metric_map: Dict[str, Any]) -> None:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
683 for metric_name, values in metric_map.items():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
684 if isinstance(values, list) and any(isinstance(v, (int, float)) for v in values):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
685 ordered_metrics.append((metric_name, values))
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
686
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
687 if isinstance(split_stats, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
688 combined = split_stats.get("combined")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
689 if isinstance(combined, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
690 _append_metrics(combined)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
691
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
692 for feature_name, feature_metrics in split_stats.items():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
693 if feature_name == "combined" or not isinstance(feature_metrics, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
694 continue
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
695 _append_metrics(feature_metrics)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
696
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
697 if prefer:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
698 for metric_name, values in ordered_metrics:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
699 if metric_name == prefer:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
700 return metric_name, values
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
701
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
702 return ordered_metrics[0] if ordered_metrics else (None, None)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
703
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
704 def generate_plots(self, output_dir: Path) -> None:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
705 """Generate Ludwig visualizations (train/val + test) for the latest experiment run."""
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
706 logger.info("Generating Ludwig visualizations (train/val + test)…")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
707
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
708 # Train/validation visualizations
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
709 train_plots = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
710 "learning_curves",
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
711 }
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
712
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
713 # Test visualizations (multi-class transparency)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
714 test_plots = {
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
715 "confusion_matrix",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
716 "compare_performance",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
717 "compare_classifiers_multiclass_multimetric",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
718 "frequency_vs_f1",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
719 "confidence_thresholding",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
720 "confidence_thresholding_data_vs_acc",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
721 "confidence_thresholding_data_vs_acc_subset",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
722 "confidence_thresholding_data_vs_acc_subset_per_class",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
723 # Binary-only visualizations will still be attempted; multi-class replacements handled elsewhere
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
724 "binary_threshold_vs_metric",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
725 "roc_curves",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
726 "precision_recall_curves",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
727 "calibration_1_vs_all",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
728 "calibration_multiclass",
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
729 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
730
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
731 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
732 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
733 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
734 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
735 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
736 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
737 logger.warning(f"No experiment run dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
738 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
739 exp_dir = exp_dirs[-1]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
740
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
741 viz_dir = exp_dir / "visualizations"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
742 viz_dir.mkdir(exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
743 train_viz = viz_dir / "train"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
744 test_viz = viz_dir / "test"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
745 train_viz.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
746 test_viz.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
747
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
748 def _check(p: Path) -> Optional[str]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
749 return str(p) if p.exists() else None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
750
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
751 training_stats = _check(exp_dir / "training_statistics.json")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
752 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
753 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
754
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
755 dataset_path = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
756 split_file = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
757 desc = exp_dir / DESCRIPTION_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
758 if desc.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
759 with open(desc, "r") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
760 cfg = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
761 dataset_path = _check(Path(cfg.get("dataset", "")))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
762 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
763 model_name = cfg.get("model_name", "model")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
764 else:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
765 model_name = "model"
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
766
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
767 output_feature = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
768 if desc.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
769 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
770 output_feature = cfg["config"]["output_features"][0]["name"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
771 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
772 pass
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
773 if not output_feature and test_stats:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
774 with open(test_stats, "r") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
775 stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
776 output_feature = next(iter(stats.keys()), "")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
777
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
778 probs_path = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
779 prob_candidates = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
780 exp_dir / f"{LABEL_COLUMN_NAME}_probabilities.csv",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
781 exp_dir / f"{output_feature}_probabilities.csv" if output_feature else None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
782 exp_dir / "probabilities.csv",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
783 exp_dir / "predictions.csv",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
784 exp_dir / PREDICTIONS_PARQUET_FILE_NAME,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
785 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
786 for cand in prob_candidates:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
787 if cand and Path(cand).exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
788 probs_path = str(cand)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
789 break
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
790
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
791 viz_registry = get_visualizations_registry()
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
792 if not viz_registry:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
793 logger.warning(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
794 "Ludwig visualizations registry not available; train/test PNGs will be skipped."
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
795 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
796 return
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
797
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
798 base_kwargs = {
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
799 "training_statistics": [training_stats] if training_stats else [],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
800 "test_statistics": [test_stats] if test_stats else [],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
801 "probabilities": [probs_path] if probs_path else [],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
802 "output_feature_name": output_feature,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
803 "ground_truth_split": 2,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
804 "top_n_classes": [20],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
805 "top_k": 3,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
806 "metrics": ["f1", "precision", "recall", "accuracy"],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
807 "positive_label": 0,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
808 "ground_truth_metadata": gt_metadata,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
809 "ground_truth": dataset_path,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
810 "split_file": split_file,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
811 "output_directory": None, # set per plot below
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
812 "normalize": False,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
813 "file_format": "png",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
814 "model_names": [model_name],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
815 }
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
816 for viz_name, viz_func in viz_registry.items():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
817 if viz_name in train_plots:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
818 viz_dir_plot = train_viz
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
819 elif viz_name in test_plots:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
820 viz_dir_plot = test_viz
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
821 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
822 continue
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
823
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
824 try:
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
825 # Build per-viz kwargs based on the function signature to avoid unexpected args
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
826 sig_params = set(inspect.signature(viz_func).parameters.keys())
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
827 call_kwargs = {
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
828 k: v
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
829 for k, v in base_kwargs.items()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
830 if k in sig_params and v is not None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
831 }
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
832 if "output_directory" in sig_params:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
833 call_kwargs["output_directory"] = str(viz_dir_plot)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
834
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
835 viz_func(
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
836 **call_kwargs,
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
837 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
838 logger.info(f"✔ Generated {viz_name}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
839 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
840 logger.warning(f"✘ Skipped {viz_name}: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
841 logger.info(f"All visualizations written to {viz_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
842
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
843 def generate_html_report(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
844 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
845 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
846 output_dir: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
847 config: dict,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
848 split_info: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
849 ) -> Path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
850 """Assemble an HTML report from visualizations under train_val/ and test/ folders."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
851 cwd = Path.cwd()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
852 report_name = title.lower().replace(" ", "_") + "_report.html"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
853 report_path = cwd / report_name
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
854 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
855 output_type = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
856
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
857 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
858 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
859 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
860 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
861 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
862 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
863 exp_dir = exp_dirs[-1]
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
864 train_set_metadata_path = exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
865 label_metadata_path = config.get("label_column_data_path")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
866 if label_metadata_path:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
867 label_metadata_path = Path(label_metadata_path)
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
868 dataset_path_from_desc: Optional[Path] = None
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
869
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
870 # Pull additional config details from description.json if available
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
871 config_for_summary = dict(config)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
872 if "target_column" not in config_for_summary or not config_for_summary.get("target_column"):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
873 config_for_summary["target_column"] = LABEL_COLUMN_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
874 desc_path = exp_dir / DESCRIPTION_FILE_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
875 if desc_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
876 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
877 with open(desc_path, "r") as f:
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
878 desc_json = json.load(f)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
879 desc_cfg = desc_json.get("config", {}) if isinstance(desc_json, dict) else {}
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
880 encoder_cfg = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
881 desc_cfg.get("input_features", [{}])[0].get("encoder", {})
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
882 if isinstance(desc_cfg.get("input_features", [{}]), list)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
883 else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
884 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
885 output_cfg = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
886 desc_cfg.get("output_features", [{}])[0]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
887 if isinstance(desc_cfg.get("output_features", [{}]), list)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
888 else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
889 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
890 trainer_cfg = desc_cfg.get("trainer", {}) if isinstance(desc_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
891 loss_cfg = output_cfg.get("loss", {}) if isinstance(output_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
892 opt_cfg = trainer_cfg.get("optimizer", {}) if isinstance(trainer_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
893 clip_cfg = trainer_cfg.get("gradient_clipping", {}) if isinstance(trainer_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
894
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
895 arch_type = encoder_cfg.get("type")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
896 arch_variant = encoder_cfg.get("model_variant")
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
897 arch_custom = encoder_cfg.get("custom_model")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
898 arch_name = None
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
899 if arch_custom:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
900 arch_name = str(arch_custom)
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
901 if arch_type:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
902 arch_base = str(arch_type).replace("_", " ").title()
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
903 arch_type_name = (
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
904 f"{arch_base} {arch_variant}" if arch_variant is not None else arch_base
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
905 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
906 # Prefer explicit custom model names (e.g., MetaFormer) but fall back to encoder type
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
907 arch_name = arch_name or arch_type_name
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
908 if not arch_name and config.get("model_name"):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
909 # As a last resort, show the user-selected model name (handles custom/MetaFormer cases)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
910 arch_name = str(config.get("model_name"))
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
911
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
912 summary_fields = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
913 "architecture": arch_name,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
914 "model_variant": arch_variant,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
915 "pretrained": encoder_cfg.get("use_pretrained"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
916 "trainable": encoder_cfg.get("trainable"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
917 "target_column": output_cfg.get("column"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
918 "task_type": output_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
919 "validation_metric": trainer_cfg.get("validation_metric"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
920 "loss_function": loss_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
921 "threshold": output_cfg.get("threshold"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
922 "total_epochs": trainer_cfg.get("epochs"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
923 "early_stop": trainer_cfg.get("early_stop"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
924 "batch_size": trainer_cfg.get("batch_size"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
925 "optimizer": opt_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
926 "learning_rate": trainer_cfg.get("learning_rate"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
927 "random_seed": desc_cfg.get("random_seed") or config.get("random_seed"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
928 "use_mixed_precision": trainer_cfg.get("use_mixed_precision"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
929 "gradient_clipping": clip_cfg.get("clipglobalnorm"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
930 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
931 for k, v in summary_fields.items():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
932 if v is None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
933 continue
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
934 # Do not override user-passed target/image column names in config
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
935 if k in {"target_column", "image_column"} and config_for_summary.get(k):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
936 continue
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
937 config_for_summary.setdefault(k, v)
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
938
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
939 dataset_field = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
940 if isinstance(desc_json, dict):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
941 dataset_field = desc_json.get("dataset") or desc_cfg.get("dataset")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
942 if dataset_field:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
943 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
944 dataset_path_from_desc = Path(dataset_field)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
945 except TypeError:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
946 dataset_path_from_desc = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
947 if dataset_path_from_desc and (not label_metadata_path or not label_metadata_path.exists()):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
948 label_metadata_path = dataset_path_from_desc
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
949 except Exception as e: # pragma: no cover - defensive
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
950 logger.warning(f"Could not merge description.json into config summary: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
951
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
952 base_viz_dir = exp_dir / "visualizations"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
953 train_viz_dir = base_viz_dir / "train"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
954
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
955 html = get_html_template()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
956
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
957 # Extra CSS & JS: center Plotly and enable CSV download for predictions table
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
958 html += """
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
959 <style>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
960 /* Center Plotly figures (both wrapper and native classes) */
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
961 .plotly-center { display: flex; justify-content: center; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
962 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
963 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
964
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
965 /* Download button for predictions table */
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
966 .download-btn {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
967 padding: 8px 12px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
968 border: 1px solid #4CAF50;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
969 background: #4CAF50;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
970 color: white;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
971 border-radius: 6px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
972 cursor: pointer;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
973 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
974 .download-btn:hover { filter: brightness(0.95); }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
975 .preds-controls {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
976 display: flex;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
977 justify-content: flex-end;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
978 gap: 8px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
979 margin: 8px 0;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
980 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
981 </style>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
982 <script>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
983 function tableToCSV(table){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
984 const rows = Array.from(table.querySelectorAll('tr'));
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
985 return rows.map(row =>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
986 Array.from(row.querySelectorAll('th,td')).map(cell => {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
987 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim();
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
988 if (text.includes('"') || text.includes(',')) {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
989 text = '"' + text.replace(/"/g,'""') + '"';
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
990 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
991 return text;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
992 }).join(',')
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
993 ).join('\\n');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
994 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
995 document.addEventListener('DOMContentLoaded', function(){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
996 const btn = document.getElementById('downloadPredsCsv');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
997 if(btn){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
998 btn.addEventListener('click', function(){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
999 const tbl = document.querySelector('.predictions-table');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1000 if(!tbl){ alert('Predictions table not found.'); return; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1001 const csv = tableToCSV(tbl);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1002 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1003 const url = URL.createObjectURL(blob);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1004 const a = document.createElement('a');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1005 a.href = url;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1006 a.download = 'ground_truth_vs_predictions.csv';
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1007 document.body.appendChild(a);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1008 a.click();
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1009 document.body.removeChild(a);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1010 URL.revokeObjectURL(url);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1011 });
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1012 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1013 });
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1014 </script>
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1015 """
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1016 html += f"<h1>{title}</h1>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1017
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1018 def append_plot_blocks(tab_html: str, plots: List[Dict[str, str]], title_suffix: str = "") -> str:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1019 """Append Plotly blocks to a tab with consistent markup."""
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1020 if not plots:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1021 return tab_html
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1022 suffix = title_suffix or ""
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1023 for plot in plots:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1024 tab_html += (
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1025 f"<h2 style='text-align: center;'>{plot['title']}{suffix}</h2>"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1026 f"<div class='plotly-center'>{plot['html']}</div>"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1027 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1028 return tab_html
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1029
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1030 def build_dataset_overview(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1031 label_metadata: Optional[Path],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1032 output_type: Optional[str],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1033 split_probabilities: Optional[List[float]],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1034 label_split_counts: Optional[List[Dict[str, int]]] = None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1035 split_counts: Optional[Dict[int, int]] = None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1036 fallback_dataset: Optional[Path] = None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1037 ) -> str:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1038 """Summarize dataset distribution across splits using the actual split config."""
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1039 if label_split_counts:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1040 # Use the actual counts captured during data prep instead of heuristics.
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1041 return format_dataset_overview_table(label_split_counts, regression_mode=False)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1042
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1043 if output_type == "regression" and split_counts:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1044 rows = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1045 {"split": "train", "count": int(split_counts.get(0, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1046 {"split": "validation", "count": int(split_counts.get(1, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1047 {"split": "test", "count": int(split_counts.get(2, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1048 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1049 return format_dataset_overview_table(rows, regression_mode=True)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1050
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1051 candidate_paths: List[Path] = []
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1052 if label_metadata and label_metadata.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1053 candidate_paths.append(label_metadata)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1054 if fallback_dataset and fallback_dataset.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1055 candidate_paths.append(fallback_dataset)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1056 if not candidate_paths:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1057 return format_dataset_overview_table([])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1058
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1059 def _normalize_split_probabilities(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1060 probs: Optional[List[float]],
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1061 ) -> Optional[List[float]]:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1062 if not probs or len(probs) != 3:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1063 return None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1064 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1065 probs = [float(p) for p in probs]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1066 except (TypeError, ValueError):
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1067 return None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1068 total = sum(probs)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1069 if total <= 0:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1070 return None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1071 return [p / total for p in probs]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1072
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1073 def _split_counts_from_column(df: pd.DataFrame) -> Dict[int, int]:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1074 if SPLIT_COLUMN_NAME not in df.columns:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1075 return {}
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1076 split_series = pd.to_numeric(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1077 df[SPLIT_COLUMN_NAME], errors="coerce"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1078 ).dropna()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1079 if split_series.empty:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1080 return {}
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1081 split_series = split_series.astype(int)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1082 return split_series.value_counts().to_dict()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1083
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1084 def _split_counts_from_probs(total: int, probs: List[float]) -> Dict[int, int]:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1085 train_n = int(total * probs[0])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1086 val_n = int(total * probs[1])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1087 test_n = max(0, total - train_n - val_n)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1088 return {0: train_n, 1: val_n, 2: test_n}
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1089
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1090 fallback_rows: Optional[List[Dict[str, int]]] = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1091 for meta_path in candidate_paths:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1092 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1093 df_labels = pd.read_csv(meta_path)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1094 probs = _normalize_split_probabilities(split_probabilities)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1095
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1096 # Regression (or missing label column): only need split counts
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1097 if output_type == "regression" or LABEL_COLUMN_NAME not in df_labels.columns:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1098 split_counts_found = _split_counts_from_column(df_labels)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1099 if split_counts_found:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1100 rows = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1101 {"split": "train", "count": int(split_counts_found.get(0, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1102 {"split": "validation", "count": int(split_counts_found.get(1, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1103 {"split": "test", "count": int(split_counts_found.get(2, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1104 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1105 return format_dataset_overview_table(rows, regression_mode=True)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1106 if probs and fallback_rows is None:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1107 split_counts_found = _split_counts_from_probs(len(df_labels), probs)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1108 fallback_rows = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1109 {"split": "train", "count": int(split_counts_found.get(0, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1110 {"split": "validation", "count": int(split_counts_found.get(1, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1111 {"split": "test", "count": int(split_counts_found.get(2, 0))},
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1112 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1113 continue
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1114
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1115 # Classification: prefer actual split assignments; fall back to configured probabilities
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1116 if SPLIT_COLUMN_NAME in df_labels.columns:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1117 df_counts = df_labels[[LABEL_COLUMN_NAME, SPLIT_COLUMN_NAME]].copy()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1118 df_counts[SPLIT_COLUMN_NAME] = pd.to_numeric(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1119 df_counts[SPLIT_COLUMN_NAME], errors="coerce"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1120 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1121 df_counts = df_counts.dropna(subset=[SPLIT_COLUMN_NAME])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1122 if df_counts.empty:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1123 continue
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1124
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1125 df_counts[SPLIT_COLUMN_NAME] = df_counts[SPLIT_COLUMN_NAME].astype(int)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1126 df_counts = df_counts.dropna(subset=[LABEL_COLUMN_NAME])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1127 counts = (
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1128 df_counts.groupby([LABEL_COLUMN_NAME, SPLIT_COLUMN_NAME])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1129 .size()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1130 .unstack(fill_value=0)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1131 .sort_index()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1132 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1133 rows = []
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1134 for lbl, row in counts.iterrows():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1135 rows.append(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1136 {
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1137 "label": str(lbl),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1138 "train": int(row.get(0, 0)),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1139 "validation": int(row.get(1, 0)),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1140 "test": int(row.get(2, 0)),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1141 }
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1142 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1143 return format_dataset_overview_table(rows)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1144
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1145 if probs:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1146 label_series = df_labels[LABEL_COLUMN_NAME].dropna()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1147 label_counts = label_series.value_counts().sort_index()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1148 if label_counts.empty:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1149 continue
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1150 rows = []
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1151 for lbl, count in label_counts.items():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1152 train_n = int(count * probs[0])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1153 val_n = int(count * probs[1])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1154 test_n = max(0, count - train_n - val_n)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1155 rows.append(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1156 {
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1157 "label": str(lbl),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1158 "train": train_n,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1159 "validation": val_n,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1160 "test": test_n,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1161 }
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1162 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1163 fallback_rows = fallback_rows or rows
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1164 except Exception as exc:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1165 logger.warning("Failed to build dataset overview from %s: %s", meta_path, exc)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1166 continue
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1167
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1168 if fallback_rows:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1169 return format_dataset_overview_table(fallback_rows, regression_mode=output_type == "regression")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1170 return format_dataset_overview_table([])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1171
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1172 metrics_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1173 train_val_metrics_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1174 test_metrics_html = ""
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1175 output_type = None
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1176 train_stats_path = exp_dir / "training_statistics.json"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1177 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1178 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1179 if train_stats_path.exists() and test_stats_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1180 with open(train_stats_path) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1181 train_stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1182 with open(test_stats_path) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1183 test_stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1184 output_type = detect_output_type(test_stats)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1185 metrics_html = format_stats_table_html(train_stats, test_stats, output_type)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1186 train_val_metrics_html = format_train_val_stats_table_html(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1187 train_stats, test_stats
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1188 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1189 test_metrics_html = format_test_merged_stats_table_html(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1190 extract_metrics_from_json(train_stats, test_stats, output_type)[
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1191 "test"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1192 ], output_type
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1193 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1194 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1195 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1196 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1197 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1198
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1199 if not output_type:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1200 # Fallback to configured task type when stats are unavailable (e.g., failed run).
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1201 output_type = (
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1202 str(config_for_summary.get("task_type")).lower()
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1203 if config_for_summary.get("task_type")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1204 else None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1205 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1206
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1207 dataset_overview_html = build_dataset_overview(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1208 label_metadata_path,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1209 output_type,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1210 config.get("split_probabilities"),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1211 config.get("label_split_counts"),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1212 config.get("split_counts"),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1213 dataset_path_from_desc,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1214 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1215
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1216 config_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1217 training_progress = self.get_training_process(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1218 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1219 config_html = format_config_table_html(
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1220 config_for_summary, split_info, training_progress, output_type
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1221 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1222 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1223 logger.warning(f"Could not load config for HTML report: {e}")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1224 config_html = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1225 "<h2 style='text-align: center;'>Model and Training Summary</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1226 "<p style='text-align:center; color:#666;'>Configuration details unavailable.</p>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1227 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1228 if not config_html:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1229 config_html = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1230 "<h2 style='text-align: center;'>Model and Training Summary</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1231 "<p style='text-align:center; color:#666;'>No configuration details found.</p>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1232 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1233
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1234 # ---------- image rendering with exclusions ----------
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1235 def render_img_section(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1236 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1237 dir_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1238 output_type: str = None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1239 exclude_names: Optional[set] = None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1240 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1241 if not dir_path.exists():
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1242 return ""
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1243
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1244 exclude_names = exclude_names or set()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1245
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1246 # Search recursively because Ludwig can nest figures under per-feature folders
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1247 imgs = list(dir_path.rglob("*.png"))
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1248
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1249 # Exclude ROC curves and standard confusion matrices (keep only entropy version)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1250 default_exclude = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1251 # "roc_curves.png", # Remove ROC curves from test tab
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1252 "confusion_matrix__label_top5.png", # Remove standard confusion matrix
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1253 "confusion_matrix__label_top10.png", # Remove duplicate
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1254 "confusion_matrix__label_top6.png", # Remove duplicate
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1255 "confusion_matrix_entropy__label_top10.png", # Keep only top5
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1256 "confusion_matrix_entropy__label_top6.png", # Keep only top5
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1257 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1258 title_is_test = title.lower().startswith("test")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1259 if title_is_test and output_type == "binary":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1260 default_exclude.update(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1261 {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1262 "confusion_matrix__label_top2.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1263 "confusion_matrix_entropy__label_top2.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1264 "roc_curves_from_prediction_statistics.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1265 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1266 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1267 elif title_is_test and output_type == "category":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1268 default_exclude.update(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1269 {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1270 "compare_classifiers_multiclass_multimetric__label_best10.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1271 "compare_classifiers_multiclass_multimetric__label_sorted.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1272 "compare_classifiers_multiclass_multimetric__label_worst10.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1273 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1274 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1275
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1276 imgs = [
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1277 img
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1278 for img in imgs
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1279 if img.name not in default_exclude
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1280 and img.name not in exclude_names
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1281 and not (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1282 "learning_curves" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1283 and "loss" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1284 and "label" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1285 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1286 ]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1287
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1288 if not imgs:
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1289 return ""
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1290
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1291 # Sort images by name for consistent ordering (works with string and numeric labels)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1292 imgs = sorted(imgs, key=lambda x: x.name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1293
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1294 html_section = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1295 custom_titles = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1296 "compare_classifiers_multiclass_multimetric__label_top10": "Metric Comparison by Label",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1297 "compare_classifiers_performance_from_prob": "Label Metric Comparison by Probability",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1298 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1299 for img in imgs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1300 b64 = encode_image_to_base64(str(img))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1301 default_title = img.stem.replace("_", " ").title()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1302 img_title = custom_titles.get(img.stem, default_title)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1303 html_section += (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1304 f"<h2 style='text-align: center;'>{img_title}</h2>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1305 f'<div class="plot" style="margin-bottom:20px;text-align:center;">'
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1306 f'<img src="data:image/png;base64,{b64}" '
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1307 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />'
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1308 f"</div>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1309 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1310 return html_section
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1311
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1312 # Show dataset overview, performance first, then config
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1313 predictions_csv_path = exp_dir / "predictions.csv"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1314
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1315 tab1_content = dataset_overview_html + metrics_html + config_html
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1316
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1317 tab2_content = train_val_metrics_html
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1318 # Preload binary threshold plot so it appears first in Train/Val tab
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1319 threshold_plot = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1320 threshold_value = (
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1321 config_for_summary.get("threshold")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1322 if config_for_summary.get("threshold") is not None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1323 else config.get("threshold")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1324 )
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1325 if threshold_value is None and output_type == "binary":
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1326 threshold_value = 0.5
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1327 if output_type == "binary" and predictions_csv_path.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1328 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1329 threshold_plot = build_binary_threshold_plot(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1330 str(predictions_csv_path),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1331 label_data_path=str(config.get("label_column_data_path"))
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1332 if config.get("label_column_data_path")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1333 else None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1334 split_value=1,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1335 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1336 except Exception as e:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1337 logger.warning(f"Could not generate validation threshold plot: {e}")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1338
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1339 if train_stats_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1340 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1341 if output_type == "regression":
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1342 tv_plots = build_regression_train_val_plots(str(train_stats_path))
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1343 tab2_content = append_plot_blocks(tab2_content, tv_plots)
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1344 else:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1345 tv_plots = build_train_validation_plots(str(train_stats_path))
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1346 # Add threshold plot first, then other train/val plots
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1347 if threshold_plot:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1348 tab2_content = append_plot_blocks(tab2_content, [threshold_plot])
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1349 # Only append once; avoid duplicates if added elsewhere
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1350 threshold_plot = None
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1351 tab2_content = append_plot_blocks(tab2_content, tv_plots)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1352 if threshold_plot or tv_plots:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1353 logger.info(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1354 f"Added {len(tv_plots) + (1 if threshold_plot else 0)} train/val diagnostic plots"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1355 )
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1356 except Exception as e:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1357 logger.warning(f"Could not generate train/val plots: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1358
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1359 # Only include training PNGs for regression; classification is handled by filtered Plotly plots
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1360 if output_type == "regression":
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1361 tab2_content += render_img_section(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1362 "Training and Validation Visualizations",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1363 train_viz_dir,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1364 output_type,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1365 exclude_names={
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1366 "compare_classifiers_performance_from_prob.png",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1367 "roc_curves_from_prediction_statistics.png",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1368 "precision_recall_curves_from_prediction_statistics.png",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1369 "precision_recall_curve.png",
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1370 },
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1371 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1372
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1373 # Validation diagnostics (calibration/threshold) from predictions.csv, using split=1
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1374 if output_type in ("binary", "category") and predictions_csv_path.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1375 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1376 val_diag_plots = build_prediction_diagnostics(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1377 str(predictions_csv_path),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1378 label_data_path=str(config.get("label_column_data_path"))
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1379 if config.get("label_column_data_path")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1380 else None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1381 split_value=1,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1382 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1383 val_conf_plots = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1384 p for p in val_diag_plots if "Prediction Confidence Distribution" in p.get("title", "")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1385 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1386 tab2_content = append_plot_blocks(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1387 tab2_content, val_conf_plots, " (Validation)"
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1388 )
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1389 except Exception as e:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1390 logger.warning(f"Could not generate validation diagnostics: {e}")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1391
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1392 # --- Predictions vs Ground Truth table (REGRESSION ONLY) ---
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1393 preds_section = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1394 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1395 if output_type == "regression" and parquet_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1396 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1397 # 1) load predictions from Parquet
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1398 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1399 # assume the column containing your model's prediction is named "prediction"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1400 # or contains that substring:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1401 pred_col = next(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1402 (c for c in df_preds.columns if "prediction" in c.lower()),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1403 None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1404 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1405 if pred_col is None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1406 raise ValueError("No prediction column found in Parquet output")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1407 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1408
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1409 # 2) load ground truth for the test split from prepared CSV
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1410 df_all = pd.read_csv(config["label_column_data_path"])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1411 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1412 LABEL_COLUMN_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1413 ].reset_index(drop=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1414 # 3) concatenate side-by-side
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1415 df_table = pd.concat([df_gt, df_pred], axis=1)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1416 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1417
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1418 # 4) render as HTML
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1419 preds_html = df_table.to_html(index=False, classes="predictions-table")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1420 preds_section = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1421 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1422 "<div class='preds-controls'>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1423 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1424 "</div>"
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1425 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:350px; margin-bottom:20px;'>"
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1426 + preds_html
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1427 + "</div>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1428 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1429 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1430 logger.warning(f"Could not build Predictions vs GT table: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1431
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1432 tab3_content = test_metrics_html + preds_section
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1433
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1434 if output_type == "regression" and train_stats_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1435 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1436 test_plots = build_regression_test_plots(str(train_stats_path))
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1437 tab3_content = append_plot_blocks(tab3_content, test_plots)
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1438 if test_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1439 logger.info(f"Generated {len(test_plots)} regression test plots")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1440 except Exception as e:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1441 logger.warning(f"Could not generate regression test plots: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1442
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1443 if output_type in ("binary", "category") and test_stats_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1444 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1445 interactive_plots = build_classification_plots(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1446 str(test_stats_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1447 str(train_stats_path) if train_stats_path.exists() else None,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1448 metadata_csv_path=str(label_metadata_path)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1449 if label_metadata_path and label_metadata_path.exists()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1450 else None,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1451 train_set_metadata_path=str(train_set_metadata_path)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1452 if train_set_metadata_path.exists()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1453 else None,
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1454 threshold=threshold_value,
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1455 )
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1456 tab3_content = append_plot_blocks(tab3_content, interactive_plots)
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1457 if interactive_plots:
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1458 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1459 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1460 logger.warning(f"Could not generate Plotly plots: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1461
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1462 # Multi-class transparency plots from test stats (replace ROC/PR for multi-class)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1463 if output_type == "category" and test_stats_path.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1464 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1465 multi_curves = build_multiclass_metric_plots(str(test_stats_path))
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1466 tab3_content = append_plot_blocks(tab3_content, multi_curves)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1467 if multi_curves:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1468 logger.info("Added multi-class per-class metric plots to test tab")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1469 except Exception as e:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1470 logger.warning(f"Could not generate multi-class metric plots: {e}")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1471
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1472 # Test diagnostics (confidence histogram) from predictions.csv, using split=2
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1473 if predictions_csv_path.exists():
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1474 try:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1475 test_diag_plots = build_prediction_diagnostics(
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1476 str(predictions_csv_path),
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1477 label_data_path=str(config.get("label_column_data_path"))
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1478 if config.get("label_column_data_path")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1479 else None,
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1480 split_value=2,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1481 )
17
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1482 test_conf_plots = [
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1483 p for p in test_diag_plots if "Prediction Confidence Distribution" in p.get("title", "")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1484 ]
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1485 if test_conf_plots:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1486 tab3_content = append_plot_blocks(tab3_content, test_conf_plots)
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1487 logger.info("Added test prediction confidence plot")
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1488 except Exception as e:
db9be962dc13 planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents: 16
diff changeset
1489 logger.warning(f"Could not generate test diagnostics: {e}")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1490
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1491 # Add static TEST PNGs (with default dedupe/exclusions)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1492 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1493 modal_html = get_metrics_help_modal()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1494 html += tabbed_html + modal_html + get_html_closing()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1495
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1496 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1497 with open(report_path, "w") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1498 f.write(html)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1499 logger.info(f"HTML report generated at: {report_path}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1500 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1501 logger.error(f"Failed to write HTML report: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1502 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1503
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1504 return report_path