Mercurial > repos > goeckslab > image_learner
annotate ludwig_backend.py @ 17:db9be962dc13 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
| author | goeckslab |
|---|---|
| date | Wed, 10 Dec 2025 00:24:13 +0000 |
| parents | 8729f69e9207 |
| children |
| rev | line source |
|---|---|
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1 import inspect |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
2 import json |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
3 import logging |
|
16
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
4 import os |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
5 from pathlib import Path |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
6 from typing import Any, Dict, List, Optional, Protocol, Tuple |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
7 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
8 import pandas as pd |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
9 import pandas.api.types as ptypes |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
10 import yaml |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
11 from constants import ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
12 IMAGE_PATH_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
13 LABEL_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
14 MODEL_ENCODER_TEMPLATES, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
15 SPLIT_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
16 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
17 from html_structure import ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
18 build_tabbed_html, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
19 encode_image_to_base64, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
20 format_config_table_html, |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
21 format_dataset_overview_table, |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
22 format_stats_table_html, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
23 format_test_merged_stats_table_html, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
24 format_train_val_stats_table_html, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
25 get_html_closing, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
26 get_html_template, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
27 get_metrics_help_modal, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
28 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
29 from ludwig.globals import ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
30 DESCRIPTION_FILE_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
31 PREDICTIONS_PARQUET_FILE_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
32 TEST_STATISTICS_FILE_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
33 TRAIN_SET_METADATA_FILE_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
34 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
35 from ludwig.utils.data_utils import get_split_path |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
36 from metaformer_setup import get_visualizations_registry, META_DEFAULT_CFGS |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
37 from plotly_plots import ( |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
38 build_binary_threshold_plot, |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
39 build_classification_plots, |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
40 build_multiclass_metric_plots, |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
41 build_prediction_diagnostics, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
42 build_regression_test_plots, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
43 build_regression_train_val_plots, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
44 build_train_validation_plots, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
45 ) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
46 from utils import detect_output_type, extract_metrics_from_json |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
47 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
48 logger = logging.getLogger("ImageLearner") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
49 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
50 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
51 class Backend(Protocol): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
52 """Interface for a machine learning backend.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
53 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
54 def prepare_config( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
55 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
56 config_params: Dict[str, Any], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
57 split_config: Dict[str, Any], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
58 ) -> str: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
59 ... |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
60 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
61 def run_experiment( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
62 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
63 dataset_path: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
64 config_path: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
65 output_dir: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
66 random_seed: int, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
67 ) -> None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
68 ... |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
69 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
70 def generate_plots(self, output_dir: Path) -> None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
71 ... |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
72 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
73 def generate_html_report( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
74 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
75 title: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
76 output_dir: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
77 config: Dict[str, Any], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
78 split_info: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
79 ) -> Path: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
80 ... |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
81 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
82 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
83 class LudwigDirectBackend: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
84 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
85 |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
86 _torchvision_patched = False |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
87 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
88 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
89 """Detect image dimensions from the first image in the dataset.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
90 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
91 import zipfile |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
92 from PIL import Image |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
93 import io |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
94 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
95 # Check if image_zip is provided |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
96 if not image_zip_path: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
97 logger.warning("No image zip provided, using default 224x224") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
98 return 224, 224 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
99 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
100 # Extract first image to detect dimensions |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
101 with zipfile.ZipFile(image_zip_path, 'r') as z: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
102 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
103 if not image_files: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
104 logger.warning("No image files found in zip, using default 224x224") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
105 return 224, 224 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
106 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
107 # Check first image |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
108 with z.open(image_files[0]) as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
109 img = Image.open(io.BytesIO(f.read())) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
110 width, height = img.size |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
111 logger.info(f"Detected image dimensions: {width}x{height}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
112 return height, width # Return as (height, width) to match encoder config |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
113 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
114 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
115 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
116 return 224, 224 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
117 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
118 def prepare_config( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
119 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
120 config_params: Dict[str, Any], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
121 split_config: Dict[str, Any], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
122 ) -> str: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
123 logger.info("LudwigDirectBackend: Preparing YAML configuration.") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
124 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
125 model_name = config_params.get("model_name", "resnet18") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
126 use_pretrained = config_params.get("use_pretrained", False) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
127 fine_tune = config_params.get("fine_tune", False) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
128 if use_pretrained: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
129 trainable = bool(fine_tune) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
130 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
131 trainable = True |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
132 epochs = config_params.get("epochs", 10) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
133 batch_size = config_params.get("batch_size") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
134 num_processes = config_params.get("preprocessing_num_processes", 1) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
135 early_stop = config_params.get("early_stop", None) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
136 learning_rate = config_params.get("learning_rate") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
137 learning_rate = "auto" if learning_rate is None else float(learning_rate) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
138 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
139 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
140 # --- MetaFormer detection and config logic --- |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
141 def _is_metaformer(name: str) -> bool: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
142 return isinstance(name, str) and name.startswith( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
143 ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
144 "identityformer_", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
145 "randformer_", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
146 "poolformerv2_", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
147 "convformer_", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
148 "caformer_", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
149 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
150 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
151 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
152 # Check if this is a MetaFormer model (either direct name or in custom_model) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
153 is_metaformer = ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
154 _is_metaformer(model_name) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
155 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"])) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
156 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
157 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
158 metaformer_resize: Optional[Tuple[int, int]] = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
159 metaformer_channels = 3 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
160 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
161 if is_metaformer: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
162 # Handle MetaFormer models |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
163 custom_model = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
164 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
165 custom_model = raw_encoder["custom_model"] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
166 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
167 custom_model = model_name |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
168 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
169 logger.info(f"DETECTED MetaFormer model: {custom_model}") |
|
16
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
170 # Stash the model name for patched Stacked2DCNN in case Ludwig drops custom_model from kwargs |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
171 try: |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
172 from MetaFormer.metaformer_stacked_cnn import set_current_metaformer_model |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
173 |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
174 set_current_metaformer_model(custom_model) |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
175 except Exception: |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
176 logger.debug("Could not set current MetaFormer model hint; proceeding without global override") |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
177 # Also pass via environment to survive process boundaries (e.g., Ray workers) |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
178 os.environ["GLEAM_META_FORMER_MODEL"] = custom_model |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
179 cfg_channels, cfg_height, cfg_width = 3, 224, 224 |
|
16
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
180 model_cfg = {} |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
181 if META_DEFAULT_CFGS: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
182 model_cfg = META_DEFAULT_CFGS.get(custom_model, {}) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
183 input_size = model_cfg.get("input_size") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
184 if isinstance(input_size, (list, tuple)) and len(input_size) == 3: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
185 cfg_channels, cfg_height, cfg_width = ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
186 int(input_size[0]), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
187 int(input_size[1]), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
188 int(input_size[2]), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
189 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
190 |
|
16
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
191 weights_url = None |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
192 if isinstance(model_cfg, dict): |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
193 weights_url = model_cfg.get("url") |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
194 logger.info( |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
195 "MetaFormer cfg lookup: model=%s has_cfg=%s url=%s use_pretrained=%s", |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
196 custom_model, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
197 bool(model_cfg), |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
198 weights_url, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
199 use_pretrained, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
200 ) |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
201 if use_pretrained and not weights_url: |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
202 logger.warning( |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
203 "MetaFormer pretrained requested for %s but no URL found in default cfgs; model will be randomly initialized", |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
204 custom_model, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
205 ) |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
206 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
207 resize_value = config_params.get("image_resize") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
208 if resize_value and resize_value != "original": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
209 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
210 dimensions = resize_value.split("x") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
211 if len(dimensions) == 2: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
212 target_height, target_width = int(dimensions[0]), int(dimensions[1]) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
213 if target_height <= 0 or target_width <= 0: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
214 raise ValueError( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
215 f"Image resize must be positive integers, received {resize_value}." |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
216 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
217 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
218 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
219 raise ValueError(resize_value) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
220 except (ValueError, IndexError): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
221 logger.warning( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
222 "Invalid image resize format '%s'; falling back to model default %sx%s", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
223 resize_value, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
224 cfg_height, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
225 cfg_width, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
226 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
227 target_height, target_width = cfg_height, cfg_width |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
228 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
229 image_zip_path = config_params.get("image_zip", "") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
230 detected_height, detected_width = self._detect_image_dimensions(image_zip_path) |
|
16
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
231 target_height, target_width = detected_height, detected_width |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
232 if use_pretrained and (detected_height, detected_width) != (cfg_height, cfg_width): |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
233 logger.info( |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
234 "MetaFormer pretrained weights expect %sx%s; proceeding with detected %sx%s", |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
235 cfg_height, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
236 cfg_width, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
237 detected_height, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
238 detected_width, |
|
8729f69e9207
planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents:
15
diff
changeset
|
239 ) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
240 if target_height <= 0 or target_width <= 0: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
241 raise ValueError( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
242 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}." |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
243 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
244 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
245 metaformer_channels = cfg_channels |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
246 metaformer_resize = (target_height, target_width) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
247 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
248 encoder_config = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
249 "type": "stacked_cnn", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
250 "height": target_height, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
251 "width": target_width, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
252 "num_channels": metaformer_channels, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
253 "output_size": 128, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
254 "use_pretrained": use_pretrained, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
255 "trainable": trainable, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
256 "custom_model": custom_model, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
257 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
258 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
259 elif isinstance(raw_encoder, dict): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
260 # Handle image resize for regular encoders |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
261 # Note: Standard encoders like ResNet don't support height/width parameters |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
262 # Resize will be handled at the preprocessing level by Ludwig |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
263 if config_params.get("image_resize") and config_params["image_resize"] != "original": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
264 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
265 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
266 encoder_config = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
267 **raw_encoder, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
268 "use_pretrained": use_pretrained, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
269 "trainable": trainable, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
270 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
271 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
272 encoder_config = {"type": raw_encoder} |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
273 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
274 # Set a human-friendly architecture string for reporting |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
275 arch_display = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
276 if is_metaformer and custom_model: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
277 arch_display = str(custom_model) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
278 elif isinstance(raw_encoder, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
279 enc_type = raw_encoder.get("type") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
280 enc_variant = raw_encoder.get("model_variant") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
281 if enc_type: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
282 base = str(enc_type).replace("_", " ").title() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
283 arch_display = f"{base} {enc_variant}" if enc_variant is not None else base |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
284 else: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
285 arch_display = str(raw_encoder).replace("_", " ").title() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
286 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
287 if not arch_display: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
288 arch_display = str(model_name) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
289 config_params["architecture"] = arch_display |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
290 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
291 batch_size_cfg = batch_size or "auto" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
292 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
293 label_column_path = config_params.get("label_column_data_path") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
294 label_series = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
295 label_metadata_hint = config_params.get("label_metadata") or {} |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
296 output_type_hint = config_params.get("output_type_hint") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
297 num_unique_labels = int(label_metadata_hint.get("num_unique", 2)) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
298 numeric_binary_labels = bool(label_metadata_hint.get("is_numeric_binary", False)) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
299 likely_regression = bool(label_metadata_hint.get("likely_regression", False)) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
300 if label_column_path is not None and Path(label_column_path).exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
301 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
302 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
303 non_na = label_series.dropna() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
304 if not non_na.empty: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
305 num_unique_labels = non_na.nunique() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
306 is_numeric = ptypes.is_numeric_dtype(label_series.dtype) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
307 numeric_binary_labels = is_numeric and num_unique_labels == 2 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
308 likely_regression = ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
309 is_numeric and not numeric_binary_labels and num_unique_labels > 10 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
310 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
311 if numeric_binary_labels: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
312 logger.info( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
313 "Detected numeric binary labels in '%s'; configuring Ludwig for binary classification.", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
314 LABEL_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
315 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
316 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
317 logger.warning(f"Could not read label column for task detection: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
318 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
319 if output_type_hint == "binary": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
320 num_unique_labels = 2 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
321 numeric_binary_labels = numeric_binary_labels or bool( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
322 label_metadata_hint.get("is_numeric", False) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
323 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
324 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
325 if numeric_binary_labels: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
326 task_type = "classification" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
327 elif likely_regression: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
328 task_type = "regression" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
329 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
330 task_type = "classification" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
331 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
332 if task_type == "regression" and numeric_binary_labels: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
333 logger.warning( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
334 "Numeric binary labels detected but regression task chosen; forcing classification to avoid invalid Ludwig config." |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
335 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
336 task_type = "classification" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
337 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
338 config_params["task_type"] = task_type |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
339 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
340 image_feat: Dict[str, Any] = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
341 "name": IMAGE_PATH_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
342 "type": "image", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
343 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
344 # Set preprocessing dimensions FIRST for MetaFormer models |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
345 if is_metaformer: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
346 if metaformer_resize is None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
347 metaformer_resize = (224, 224) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
348 height, width = metaformer_resize |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
349 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
350 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
351 # This is essential for MetaFormer models to work properly |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
352 if "preprocessing" not in image_feat: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
353 image_feat["preprocessing"] = {} |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
354 image_feat["preprocessing"]["height"] = height |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
355 image_feat["preprocessing"]["width"] = width |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
356 # Use infer_image_dimensions=True to allow Ludwig to read images for validation |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
357 # but set explicit max dimensions to control the output size |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
358 image_feat["preprocessing"]["infer_image_dimensions"] = True |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
359 image_feat["preprocessing"]["infer_image_max_height"] = height |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
360 image_feat["preprocessing"]["infer_image_max_width"] = width |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
361 image_feat["preprocessing"]["num_channels"] = metaformer_channels |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
362 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
363 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
364 # Force Ludwig to respect our dimensions by setting additional parameters |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
365 image_feat["preprocessing"]["requires_equal_dimensions"] = False |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
366 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)") |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
367 config_params["image_size"] = f"{height}x{width}" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
368 # Now set the encoder configuration |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
369 image_feat["encoder"] = encoder_config |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
370 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
371 if config_params.get("augmentation") is not None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
372 image_feat["augmentation"] = config_params["augmentation"] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
373 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
374 # Add resize configuration for standard encoders (ResNet, etc.) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
375 # FIXED: MetaFormer models now respect user dimensions completely |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
376 # Previously there was a double resize issue where MetaFormer would force 224x224 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
377 # Now both MetaFormer and standard encoders respect user's resize choice |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
378 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
379 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
380 dimensions = config_params["image_resize"].split("x") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
381 if len(dimensions) == 2: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
382 height, width = int(dimensions[0]), int(dimensions[1]) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
383 if height <= 0 or width <= 0: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
384 raise ValueError( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
385 f"Image resize must be positive integers, received {config_params['image_resize']}." |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
386 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
387 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
388 # Add resize to preprocessing for standard encoders |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
389 if "preprocessing" not in image_feat: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
390 image_feat["preprocessing"] = {} |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
391 image_feat["preprocessing"]["height"] = height |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
392 image_feat["preprocessing"]["width"] = width |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
393 # Use infer_image_dimensions=True to allow Ludwig to read images for validation |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
394 # but set explicit max dimensions to control the output size |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
395 image_feat["preprocessing"]["infer_image_dimensions"] = True |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
396 image_feat["preprocessing"]["infer_image_max_height"] = height |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
397 image_feat["preprocessing"]["infer_image_max_width"] = width |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
398 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions") |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
399 config_params["image_size"] = f"{height}x{width}" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
400 except (ValueError, IndexError): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
401 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing") |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
402 elif not is_metaformer: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
403 # No explicit resize provided; keep for reporting purposes |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
404 config_params.setdefault("image_size", "original") |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
405 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
406 def _resolve_validation_metric(task: str, requested: Optional[str]) -> Optional[str]: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
407 """Pick a validation metric that Ludwig will accept for the resolved task.""" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
408 default_map = { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
409 "regression": "pearson_r", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
410 "binary": "roc_auc", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
411 "category": "accuracy", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
412 } |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
413 allowed_map = { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
414 "regression": { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
415 "pearson_r", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
416 "mean_absolute_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
417 "mean_squared_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
418 "root_mean_squared_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
419 "mean_absolute_percentage_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
420 "r2", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
421 "explained_variance", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
422 "loss", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
423 }, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
424 # Ludwig rejects f1 and balanced_accuracy for binary outputs; keep to known-safe set. |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
425 "binary": { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
426 "roc_auc", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
427 "accuracy", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
428 "precision", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
429 "recall", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
430 "specificity", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
431 "log_loss", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
432 "loss", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
433 }, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
434 "category": { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
435 "accuracy", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
436 "balanced_accuracy", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
437 "precision", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
438 "recall", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
439 "f1", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
440 "specificity", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
441 "log_loss", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
442 "loss", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
443 }, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
444 } |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
445 alias_map = { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
446 "regression": { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
447 "mae": "mean_absolute_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
448 "mse": "mean_squared_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
449 "rmse": "root_mean_squared_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
450 "mape": "mean_absolute_percentage_error", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
451 }, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
452 } |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
453 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
454 default_metric = default_map.get(task) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
455 allowed = allowed_map.get(task, set()) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
456 metric = requested or default_metric |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
457 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
458 if metric is None: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
459 return None |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
460 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
461 metric = alias_map.get(task, {}).get(metric, metric) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
462 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
463 if metric not in allowed: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
464 if requested: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
465 logger.warning( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
466 f"Validation metric '{requested}' is not supported for {task} outputs; using '{default_metric}' instead." |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
467 ) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
468 metric = default_metric |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
469 return metric |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
470 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
471 if task_type == "regression": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
472 output_feat = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
473 "name": LABEL_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
474 "type": "number", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
475 "decoder": {"type": "regressor"}, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
476 "loss": {"type": "mean_squared_error"}, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
477 } |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
478 val_metric = _resolve_validation_metric("regression", config_params.get("validation_metric")) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
479 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
480 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
481 if num_unique_labels == 2: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
482 output_feat = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
483 "name": LABEL_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
484 "type": "binary", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
485 "loss": {"type": "binary_weighted_cross_entropy"}, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
486 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
487 if config_params.get("threshold") is not None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
488 output_feat["threshold"] = float(config_params["threshold"]) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
489 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
490 output_feat = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
491 "name": LABEL_COLUMN_NAME, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
492 "type": "category", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
493 "loss": {"type": "softmax_cross_entropy"}, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
494 } |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
495 val_metric = _resolve_validation_metric( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
496 "binary" if num_unique_labels == 2 else "category", |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
497 config_params.get("validation_metric"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
498 ) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
499 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
500 # Propagate the resolved validation metric (including any task-based fallback or alias normalization) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
501 config_params["validation_metric"] = val_metric |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
502 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
503 conf: Dict[str, Any] = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
504 "model_type": "ecd", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
505 "input_features": [image_feat], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
506 "output_features": [output_feat], |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
507 "combiner": {"type": "concat"}, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
508 "trainer": { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
509 "epochs": epochs, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
510 "early_stop": early_stop, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
511 "batch_size": batch_size_cfg, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
512 "learning_rate": learning_rate, |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
513 # set validation_metric when provided |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
514 **({"validation_metric": val_metric} if val_metric else {}), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
515 }, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
516 "preprocessing": { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
517 "split": split_config, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
518 "num_processes": num_processes, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
519 "in_memory": False, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
520 }, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
521 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
522 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
523 logger.debug("LudwigDirectBackend: Config dict built.") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
524 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
525 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
526 logger.info("LudwigDirectBackend: YAML config generated.") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
527 return yaml_str |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
528 except Exception: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
529 logger.error( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
530 "LudwigDirectBackend: Failed to serialize YAML.", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
531 exc_info=True, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
532 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
533 raise |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
534 |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
535 def _patch_torchvision_download(self) -> None: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
536 """ |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
537 Torchvision weight downloads sometimes fail checksum validation behind |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
538 corporate proxies that rewrite binaries. Skip hash checking to allow |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
539 pre-trained weights to load in those environments. |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
540 """ |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
541 if LudwigDirectBackend._torchvision_patched: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
542 return |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
543 try: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
544 import torch.hub as torch_hub |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
545 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
546 original = torch_hub.load_state_dict_from_url |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
547 original_download = torch_hub.download_url_to_file |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
548 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
549 def _no_hash(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
550 return original( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
551 url, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
552 model_dir=model_dir, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
553 map_location=map_location, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
554 progress=progress, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
555 check_hash=False, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
556 file_name=file_name, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
557 ) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
558 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
559 def _download_no_hash(url, dst, hash_prefix=None, progress=True): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
560 # Torchvision's download_url_to_file signature does not accept check_hash in older versions. |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
561 return original_download(url, dst, hash_prefix=None, progress=progress) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
562 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
563 torch_hub.load_state_dict_from_url = _no_hash # type: ignore[assignment] |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
564 torch_hub.download_url_to_file = _download_no_hash # type: ignore[assignment] |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
565 LudwigDirectBackend._torchvision_patched = True |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
566 logger.info("Disabled torchvision weight hash verification to avoid proxy-corrupted downloads.") |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
567 except Exception as exc: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
568 logger.warning(f"Could not patch torchvision download hash check: {exc}") |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
569 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
570 def run_experiment( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
571 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
572 dataset_path: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
573 config_path: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
574 output_dir: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
575 random_seed: int = 42, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
576 ) -> None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
577 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
578 logger.info("LudwigDirectBackend: Starting experiment execution.") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
579 |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
580 # Avoid strict hash validation for torchvision weights (common in proxied environments) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
581 self._patch_torchvision_download() |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
582 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
583 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
584 from ludwig.experiment import experiment_cli |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
585 except ImportError as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
586 logger.error( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
587 "LudwigDirectBackend: Could not import experiment_cli.", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
588 exc_info=True, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
589 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
590 raise RuntimeError("Ludwig import failed.") from e |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
591 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
592 output_dir.mkdir(parents=True, exist_ok=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
593 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
594 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
595 experiment_cli( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
596 dataset=str(dataset_path), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
597 config=str(config_path), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
598 output_directory=str(output_dir), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
599 random_seed=random_seed, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
600 skip_preprocessing=True, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
601 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
602 logger.info( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
603 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
604 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
605 except TypeError as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
606 logger.error( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
607 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
608 exc_info=True, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
609 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
610 raise RuntimeError("Ludwig argument error.") from e |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
611 except Exception: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
612 logger.error( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
613 "LudwigDirectBackend: Experiment execution error.", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
614 exc_info=True, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
615 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
616 raise |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
617 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
618 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
619 """Retrieve the learning rate used in the most recent Ludwig run.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
620 output_dir = Path(output_dir) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
621 exp_dirs = sorted( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
622 output_dir.glob("experiment_run*"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
623 key=lambda p: p.stat().st_mtime, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
624 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
625 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
626 if not exp_dirs: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
627 logger.warning(f"No experiment run directories found in {output_dir}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
628 return None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
629 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
630 progress_file = exp_dirs[-1] / "model" / "training_progress.json" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
631 if not progress_file.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
632 logger.warning(f"No training_progress.json found in {progress_file}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
633 return None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
634 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
635 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
636 with progress_file.open("r", encoding="utf-8") as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
637 data = json.load(f) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
638 return { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
639 "learning_rate": data.get("learning_rate"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
640 "batch_size": data.get("batch_size"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
641 "epoch": data.get("epoch"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
642 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
643 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
644 logger.warning(f"Failed to read training progress info: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
645 return {} |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
646 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
647 def convert_parquet_to_csv(self, output_dir: Path): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
648 """Convert the predictions Parquet file to CSV.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
649 output_dir = Path(output_dir) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
650 exp_dirs = sorted( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
651 output_dir.glob("experiment_run*"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
652 key=lambda p: p.stat().st_mtime, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
653 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
654 if not exp_dirs: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
655 logger.warning(f"No experiment run dirs found in {output_dir}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
656 return |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
657 exp_dir = exp_dirs[-1] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
658 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
659 csv_path = exp_dir / "predictions.csv" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
660 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
661 # Check if parquet file exists before trying to convert |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
662 if not parquet_path.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
663 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
664 return |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
665 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
666 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
667 df = pd.read_parquet(parquet_path) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
668 df.to_csv(csv_path, index=False) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
669 logger.info(f"Converted Parquet to CSV: {csv_path}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
670 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
671 logger.error(f"Error converting Parquet to CSV: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
672 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
673 @staticmethod |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
674 def _extract_metric_series(stats: Dict[str, Any], split: str, prefer: Optional[str] = None) -> Tuple[Optional[str], Optional[List[float]]]: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
675 """Pull the first numeric metric list we can find for the requested split.""" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
676 if not isinstance(stats, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
677 return None, None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
678 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
679 split_stats = stats.get(split, {}) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
680 ordered_metrics: List[Tuple[str, List[float]]] = [] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
681 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
682 def _append_metrics(metric_map: Dict[str, Any]) -> None: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
683 for metric_name, values in metric_map.items(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
684 if isinstance(values, list) and any(isinstance(v, (int, float)) for v in values): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
685 ordered_metrics.append((metric_name, values)) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
686 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
687 if isinstance(split_stats, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
688 combined = split_stats.get("combined") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
689 if isinstance(combined, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
690 _append_metrics(combined) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
691 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
692 for feature_name, feature_metrics in split_stats.items(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
693 if feature_name == "combined" or not isinstance(feature_metrics, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
694 continue |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
695 _append_metrics(feature_metrics) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
696 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
697 if prefer: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
698 for metric_name, values in ordered_metrics: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
699 if metric_name == prefer: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
700 return metric_name, values |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
701 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
702 return ordered_metrics[0] if ordered_metrics else (None, None) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
703 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
704 def generate_plots(self, output_dir: Path) -> None: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
705 """Generate Ludwig visualizations (train/val + test) for the latest experiment run.""" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
706 logger.info("Generating Ludwig visualizations (train/val + test)…") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
707 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
708 # Train/validation visualizations |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
709 train_plots = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
710 "learning_curves", |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
711 } |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
712 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
713 # Test visualizations (multi-class transparency) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
714 test_plots = { |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
715 "confusion_matrix", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
716 "compare_performance", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
717 "compare_classifiers_multiclass_multimetric", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
718 "frequency_vs_f1", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
719 "confidence_thresholding", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
720 "confidence_thresholding_data_vs_acc", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
721 "confidence_thresholding_data_vs_acc_subset", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
722 "confidence_thresholding_data_vs_acc_subset_per_class", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
723 # Binary-only visualizations will still be attempted; multi-class replacements handled elsewhere |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
724 "binary_threshold_vs_metric", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
725 "roc_curves", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
726 "precision_recall_curves", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
727 "calibration_1_vs_all", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
728 "calibration_multiclass", |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
729 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
730 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
731 output_dir = Path(output_dir) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
732 exp_dirs = sorted( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
733 output_dir.glob("experiment_run*"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
734 key=lambda p: p.stat().st_mtime, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
735 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
736 if not exp_dirs: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
737 logger.warning(f"No experiment run dirs found in {output_dir}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
738 return |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
739 exp_dir = exp_dirs[-1] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
740 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
741 viz_dir = exp_dir / "visualizations" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
742 viz_dir.mkdir(exist_ok=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
743 train_viz = viz_dir / "train" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
744 test_viz = viz_dir / "test" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
745 train_viz.mkdir(parents=True, exist_ok=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
746 test_viz.mkdir(parents=True, exist_ok=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
747 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
748 def _check(p: Path) -> Optional[str]: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
749 return str(p) if p.exists() else None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
750 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
751 training_stats = _check(exp_dir / "training_statistics.json") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
752 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
753 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
754 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
755 dataset_path = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
756 split_file = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
757 desc = exp_dir / DESCRIPTION_FILE_NAME |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
758 if desc.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
759 with open(desc, "r") as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
760 cfg = json.load(f) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
761 dataset_path = _check(Path(cfg.get("dataset", ""))) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
762 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
763 model_name = cfg.get("model_name", "model") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
764 else: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
765 model_name = "model" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
766 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
767 output_feature = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
768 if desc.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
769 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
770 output_feature = cfg["config"]["output_features"][0]["name"] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
771 except Exception: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
772 pass |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
773 if not output_feature and test_stats: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
774 with open(test_stats, "r") as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
775 stats = json.load(f) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
776 output_feature = next(iter(stats.keys()), "") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
777 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
778 probs_path = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
779 prob_candidates = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
780 exp_dir / f"{LABEL_COLUMN_NAME}_probabilities.csv", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
781 exp_dir / f"{output_feature}_probabilities.csv" if output_feature else None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
782 exp_dir / "probabilities.csv", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
783 exp_dir / "predictions.csv", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
784 exp_dir / PREDICTIONS_PARQUET_FILE_NAME, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
785 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
786 for cand in prob_candidates: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
787 if cand and Path(cand).exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
788 probs_path = str(cand) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
789 break |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
790 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
791 viz_registry = get_visualizations_registry() |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
792 if not viz_registry: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
793 logger.warning( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
794 "Ludwig visualizations registry not available; train/test PNGs will be skipped." |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
795 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
796 return |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
797 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
798 base_kwargs = { |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
799 "training_statistics": [training_stats] if training_stats else [], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
800 "test_statistics": [test_stats] if test_stats else [], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
801 "probabilities": [probs_path] if probs_path else [], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
802 "output_feature_name": output_feature, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
803 "ground_truth_split": 2, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
804 "top_n_classes": [20], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
805 "top_k": 3, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
806 "metrics": ["f1", "precision", "recall", "accuracy"], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
807 "positive_label": 0, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
808 "ground_truth_metadata": gt_metadata, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
809 "ground_truth": dataset_path, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
810 "split_file": split_file, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
811 "output_directory": None, # set per plot below |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
812 "normalize": False, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
813 "file_format": "png", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
814 "model_names": [model_name], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
815 } |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
816 for viz_name, viz_func in viz_registry.items(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
817 if viz_name in train_plots: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
818 viz_dir_plot = train_viz |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
819 elif viz_name in test_plots: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
820 viz_dir_plot = test_viz |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
821 else: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
822 continue |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
823 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
824 try: |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
825 # Build per-viz kwargs based on the function signature to avoid unexpected args |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
826 sig_params = set(inspect.signature(viz_func).parameters.keys()) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
827 call_kwargs = { |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
828 k: v |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
829 for k, v in base_kwargs.items() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
830 if k in sig_params and v is not None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
831 } |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
832 if "output_directory" in sig_params: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
833 call_kwargs["output_directory"] = str(viz_dir_plot) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
834 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
835 viz_func( |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
836 **call_kwargs, |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
837 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
838 logger.info(f"✔ Generated {viz_name}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
839 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
840 logger.warning(f"✘ Skipped {viz_name}: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
841 logger.info(f"All visualizations written to {viz_dir}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
842 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
843 def generate_html_report( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
844 self, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
845 title: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
846 output_dir: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
847 config: dict, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
848 split_info: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
849 ) -> Path: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
850 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
851 cwd = Path.cwd() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
852 report_name = title.lower().replace(" ", "_") + "_report.html" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
853 report_path = cwd / report_name |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
854 output_dir = Path(output_dir) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
855 output_type = None |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
856 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
857 exp_dirs = sorted( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
858 output_dir.glob("experiment_run*"), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
859 key=lambda p: p.stat().st_mtime, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
860 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
861 if not exp_dirs: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
862 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
863 exp_dir = exp_dirs[-1] |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
864 train_set_metadata_path = exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
865 label_metadata_path = config.get("label_column_data_path") |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
866 if label_metadata_path: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
867 label_metadata_path = Path(label_metadata_path) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
868 dataset_path_from_desc: Optional[Path] = None |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
869 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
870 # Pull additional config details from description.json if available |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
871 config_for_summary = dict(config) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
872 if "target_column" not in config_for_summary or not config_for_summary.get("target_column"): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
873 config_for_summary["target_column"] = LABEL_COLUMN_NAME |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
874 desc_path = exp_dir / DESCRIPTION_FILE_NAME |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
875 if desc_path.exists(): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
876 try: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
877 with open(desc_path, "r") as f: |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
878 desc_json = json.load(f) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
879 desc_cfg = desc_json.get("config", {}) if isinstance(desc_json, dict) else {} |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
880 encoder_cfg = ( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
881 desc_cfg.get("input_features", [{}])[0].get("encoder", {}) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
882 if isinstance(desc_cfg.get("input_features", [{}]), list) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
883 else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
884 ) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
885 output_cfg = ( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
886 desc_cfg.get("output_features", [{}])[0] |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
887 if isinstance(desc_cfg.get("output_features", [{}]), list) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
888 else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
889 ) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
890 trainer_cfg = desc_cfg.get("trainer", {}) if isinstance(desc_cfg, dict) else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
891 loss_cfg = output_cfg.get("loss", {}) if isinstance(output_cfg, dict) else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
892 opt_cfg = trainer_cfg.get("optimizer", {}) if isinstance(trainer_cfg, dict) else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
893 clip_cfg = trainer_cfg.get("gradient_clipping", {}) if isinstance(trainer_cfg, dict) else {} |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
894 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
895 arch_type = encoder_cfg.get("type") |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
896 arch_variant = encoder_cfg.get("model_variant") |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
897 arch_custom = encoder_cfg.get("custom_model") |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
898 arch_name = None |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
899 if arch_custom: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
900 arch_name = str(arch_custom) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
901 if arch_type: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
902 arch_base = str(arch_type).replace("_", " ").title() |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
903 arch_type_name = ( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
904 f"{arch_base} {arch_variant}" if arch_variant is not None else arch_base |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
905 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
906 # Prefer explicit custom model names (e.g., MetaFormer) but fall back to encoder type |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
907 arch_name = arch_name or arch_type_name |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
908 if not arch_name and config.get("model_name"): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
909 # As a last resort, show the user-selected model name (handles custom/MetaFormer cases) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
910 arch_name = str(config.get("model_name")) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
911 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
912 summary_fields = { |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
913 "architecture": arch_name, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
914 "model_variant": arch_variant, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
915 "pretrained": encoder_cfg.get("use_pretrained"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
916 "trainable": encoder_cfg.get("trainable"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
917 "target_column": output_cfg.get("column"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
918 "task_type": output_cfg.get("type"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
919 "validation_metric": trainer_cfg.get("validation_metric"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
920 "loss_function": loss_cfg.get("type"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
921 "threshold": output_cfg.get("threshold"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
922 "total_epochs": trainer_cfg.get("epochs"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
923 "early_stop": trainer_cfg.get("early_stop"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
924 "batch_size": trainer_cfg.get("batch_size"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
925 "optimizer": opt_cfg.get("type"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
926 "learning_rate": trainer_cfg.get("learning_rate"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
927 "random_seed": desc_cfg.get("random_seed") or config.get("random_seed"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
928 "use_mixed_precision": trainer_cfg.get("use_mixed_precision"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
929 "gradient_clipping": clip_cfg.get("clipglobalnorm"), |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
930 } |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
931 for k, v in summary_fields.items(): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
932 if v is None: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
933 continue |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
934 # Do not override user-passed target/image column names in config |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
935 if k in {"target_column", "image_column"} and config_for_summary.get(k): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
936 continue |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
937 config_for_summary.setdefault(k, v) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
938 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
939 dataset_field = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
940 if isinstance(desc_json, dict): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
941 dataset_field = desc_json.get("dataset") or desc_cfg.get("dataset") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
942 if dataset_field: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
943 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
944 dataset_path_from_desc = Path(dataset_field) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
945 except TypeError: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
946 dataset_path_from_desc = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
947 if dataset_path_from_desc and (not label_metadata_path or not label_metadata_path.exists()): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
948 label_metadata_path = dataset_path_from_desc |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
949 except Exception as e: # pragma: no cover - defensive |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
950 logger.warning(f"Could not merge description.json into config summary: {e}") |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
951 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
952 base_viz_dir = exp_dir / "visualizations" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
953 train_viz_dir = base_viz_dir / "train" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
954 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
955 html = get_html_template() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
956 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
957 # Extra CSS & JS: center Plotly and enable CSV download for predictions table |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
958 html += """ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
959 <style> |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
960 /* Center Plotly figures (both wrapper and native classes) */ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
961 .plotly-center { display: flex; justify-content: center; } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
962 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
963 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
964 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
965 /* Download button for predictions table */ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
966 .download-btn { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
967 padding: 8px 12px; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
968 border: 1px solid #4CAF50; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
969 background: #4CAF50; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
970 color: white; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
971 border-radius: 6px; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
972 cursor: pointer; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
973 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
974 .download-btn:hover { filter: brightness(0.95); } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
975 .preds-controls { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
976 display: flex; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
977 justify-content: flex-end; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
978 gap: 8px; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
979 margin: 8px 0; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
980 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
981 </style> |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
982 <script> |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
983 function tableToCSV(table){ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
984 const rows = Array.from(table.querySelectorAll('tr')); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
985 return rows.map(row => |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
986 Array.from(row.querySelectorAll('th,td')).map(cell => { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
987 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim(); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
988 if (text.includes('"') || text.includes(',')) { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
989 text = '"' + text.replace(/"/g,'""') + '"'; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
990 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
991 return text; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
992 }).join(',') |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
993 ).join('\\n'); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
994 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
995 document.addEventListener('DOMContentLoaded', function(){ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
996 const btn = document.getElementById('downloadPredsCsv'); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
997 if(btn){ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
998 btn.addEventListener('click', function(){ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
999 const tbl = document.querySelector('.predictions-table'); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1000 if(!tbl){ alert('Predictions table not found.'); return; } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1001 const csv = tableToCSV(tbl); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1002 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'}); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1003 const url = URL.createObjectURL(blob); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1004 const a = document.createElement('a'); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1005 a.href = url; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1006 a.download = 'ground_truth_vs_predictions.csv'; |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1007 document.body.appendChild(a); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1008 a.click(); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1009 document.body.removeChild(a); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1010 URL.revokeObjectURL(url); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1011 }); |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1012 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1013 }); |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1014 </script> |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1015 """ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1016 html += f"<h1>{title}</h1>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1017 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1018 def append_plot_blocks(tab_html: str, plots: List[Dict[str, str]], title_suffix: str = "") -> str: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1019 """Append Plotly blocks to a tab with consistent markup.""" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1020 if not plots: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1021 return tab_html |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1022 suffix = title_suffix or "" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1023 for plot in plots: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1024 tab_html += ( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1025 f"<h2 style='text-align: center;'>{plot['title']}{suffix}</h2>" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1026 f"<div class='plotly-center'>{plot['html']}</div>" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1027 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1028 return tab_html |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1029 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1030 def build_dataset_overview( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1031 label_metadata: Optional[Path], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1032 output_type: Optional[str], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1033 split_probabilities: Optional[List[float]], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1034 label_split_counts: Optional[List[Dict[str, int]]] = None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1035 split_counts: Optional[Dict[int, int]] = None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1036 fallback_dataset: Optional[Path] = None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1037 ) -> str: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1038 """Summarize dataset distribution across splits using the actual split config.""" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1039 if label_split_counts: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1040 # Use the actual counts captured during data prep instead of heuristics. |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1041 return format_dataset_overview_table(label_split_counts, regression_mode=False) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1042 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1043 if output_type == "regression" and split_counts: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1044 rows = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1045 {"split": "train", "count": int(split_counts.get(0, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1046 {"split": "validation", "count": int(split_counts.get(1, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1047 {"split": "test", "count": int(split_counts.get(2, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1048 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1049 return format_dataset_overview_table(rows, regression_mode=True) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1050 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1051 candidate_paths: List[Path] = [] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1052 if label_metadata and label_metadata.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1053 candidate_paths.append(label_metadata) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1054 if fallback_dataset and fallback_dataset.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1055 candidate_paths.append(fallback_dataset) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1056 if not candidate_paths: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1057 return format_dataset_overview_table([]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1058 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1059 def _normalize_split_probabilities( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1060 probs: Optional[List[float]], |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1061 ) -> Optional[List[float]]: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1062 if not probs or len(probs) != 3: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1063 return None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1064 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1065 probs = [float(p) for p in probs] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1066 except (TypeError, ValueError): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1067 return None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1068 total = sum(probs) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1069 if total <= 0: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1070 return None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1071 return [p / total for p in probs] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1072 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1073 def _split_counts_from_column(df: pd.DataFrame) -> Dict[int, int]: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1074 if SPLIT_COLUMN_NAME not in df.columns: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1075 return {} |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1076 split_series = pd.to_numeric( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1077 df[SPLIT_COLUMN_NAME], errors="coerce" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1078 ).dropna() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1079 if split_series.empty: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1080 return {} |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1081 split_series = split_series.astype(int) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1082 return split_series.value_counts().to_dict() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1083 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1084 def _split_counts_from_probs(total: int, probs: List[float]) -> Dict[int, int]: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1085 train_n = int(total * probs[0]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1086 val_n = int(total * probs[1]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1087 test_n = max(0, total - train_n - val_n) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1088 return {0: train_n, 1: val_n, 2: test_n} |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1089 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1090 fallback_rows: Optional[List[Dict[str, int]]] = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1091 for meta_path in candidate_paths: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1092 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1093 df_labels = pd.read_csv(meta_path) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1094 probs = _normalize_split_probabilities(split_probabilities) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1095 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1096 # Regression (or missing label column): only need split counts |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1097 if output_type == "regression" or LABEL_COLUMN_NAME not in df_labels.columns: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1098 split_counts_found = _split_counts_from_column(df_labels) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1099 if split_counts_found: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1100 rows = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1101 {"split": "train", "count": int(split_counts_found.get(0, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1102 {"split": "validation", "count": int(split_counts_found.get(1, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1103 {"split": "test", "count": int(split_counts_found.get(2, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1104 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1105 return format_dataset_overview_table(rows, regression_mode=True) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1106 if probs and fallback_rows is None: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1107 split_counts_found = _split_counts_from_probs(len(df_labels), probs) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1108 fallback_rows = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1109 {"split": "train", "count": int(split_counts_found.get(0, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1110 {"split": "validation", "count": int(split_counts_found.get(1, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1111 {"split": "test", "count": int(split_counts_found.get(2, 0))}, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1112 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1113 continue |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1114 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1115 # Classification: prefer actual split assignments; fall back to configured probabilities |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1116 if SPLIT_COLUMN_NAME in df_labels.columns: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1117 df_counts = df_labels[[LABEL_COLUMN_NAME, SPLIT_COLUMN_NAME]].copy() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1118 df_counts[SPLIT_COLUMN_NAME] = pd.to_numeric( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1119 df_counts[SPLIT_COLUMN_NAME], errors="coerce" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1120 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1121 df_counts = df_counts.dropna(subset=[SPLIT_COLUMN_NAME]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1122 if df_counts.empty: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1123 continue |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1124 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1125 df_counts[SPLIT_COLUMN_NAME] = df_counts[SPLIT_COLUMN_NAME].astype(int) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1126 df_counts = df_counts.dropna(subset=[LABEL_COLUMN_NAME]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1127 counts = ( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1128 df_counts.groupby([LABEL_COLUMN_NAME, SPLIT_COLUMN_NAME]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1129 .size() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1130 .unstack(fill_value=0) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1131 .sort_index() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1132 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1133 rows = [] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1134 for lbl, row in counts.iterrows(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1135 rows.append( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1136 { |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1137 "label": str(lbl), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1138 "train": int(row.get(0, 0)), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1139 "validation": int(row.get(1, 0)), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1140 "test": int(row.get(2, 0)), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1141 } |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1142 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1143 return format_dataset_overview_table(rows) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1144 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1145 if probs: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1146 label_series = df_labels[LABEL_COLUMN_NAME].dropna() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1147 label_counts = label_series.value_counts().sort_index() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1148 if label_counts.empty: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1149 continue |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1150 rows = [] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1151 for lbl, count in label_counts.items(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1152 train_n = int(count * probs[0]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1153 val_n = int(count * probs[1]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1154 test_n = max(0, count - train_n - val_n) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1155 rows.append( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1156 { |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1157 "label": str(lbl), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1158 "train": train_n, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1159 "validation": val_n, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1160 "test": test_n, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1161 } |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1162 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1163 fallback_rows = fallback_rows or rows |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1164 except Exception as exc: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1165 logger.warning("Failed to build dataset overview from %s: %s", meta_path, exc) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1166 continue |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1167 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1168 if fallback_rows: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1169 return format_dataset_overview_table(fallback_rows, regression_mode=output_type == "regression") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1170 return format_dataset_overview_table([]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1171 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1172 metrics_html = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1173 train_val_metrics_html = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1174 test_metrics_html = "" |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1175 output_type = None |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1176 train_stats_path = exp_dir / "training_statistics.json" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1177 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1178 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1179 if train_stats_path.exists() and test_stats_path.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1180 with open(train_stats_path) as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1181 train_stats = json.load(f) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1182 with open(test_stats_path) as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1183 test_stats = json.load(f) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1184 output_type = detect_output_type(test_stats) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1185 metrics_html = format_stats_table_html(train_stats, test_stats, output_type) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1186 train_val_metrics_html = format_train_val_stats_table_html( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1187 train_stats, test_stats |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1188 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1189 test_metrics_html = format_test_merged_stats_table_html( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1190 extract_metrics_from_json(train_stats, test_stats, output_type)[ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1191 "test" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1192 ], output_type |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1193 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1194 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1195 logger.warning( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1196 f"Could not load stats for HTML report: {type(e).__name__}: {e}" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1197 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1198 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1199 if not output_type: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1200 # Fallback to configured task type when stats are unavailable (e.g., failed run). |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1201 output_type = ( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1202 str(config_for_summary.get("task_type")).lower() |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1203 if config_for_summary.get("task_type") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1204 else None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1205 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1206 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1207 dataset_overview_html = build_dataset_overview( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1208 label_metadata_path, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1209 output_type, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1210 config.get("split_probabilities"), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1211 config.get("label_split_counts"), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1212 config.get("split_counts"), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1213 dataset_path_from_desc, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1214 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1215 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1216 config_html = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1217 training_progress = self.get_training_process(output_dir) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1218 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1219 config_html = format_config_table_html( |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1220 config_for_summary, split_info, training_progress, output_type |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1221 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1222 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1223 logger.warning(f"Could not load config for HTML report: {e}") |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1224 config_html = ( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1225 "<h2 style='text-align: center;'>Model and Training Summary</h2>" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1226 "<p style='text-align:center; color:#666;'>Configuration details unavailable.</p>" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1227 ) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1228 if not config_html: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1229 config_html = ( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1230 "<h2 style='text-align: center;'>Model and Training Summary</h2>" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1231 "<p style='text-align:center; color:#666;'>No configuration details found.</p>" |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1232 ) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1233 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1234 # ---------- image rendering with exclusions ---------- |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1235 def render_img_section( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1236 title: str, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1237 dir_path: Path, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1238 output_type: str = None, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1239 exclude_names: Optional[set] = None, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1240 ) -> str: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1241 if not dir_path.exists(): |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1242 return "" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1243 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1244 exclude_names = exclude_names or set() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1245 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1246 # Search recursively because Ludwig can nest figures under per-feature folders |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1247 imgs = list(dir_path.rglob("*.png")) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1248 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1249 # Exclude ROC curves and standard confusion matrices (keep only entropy version) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1250 default_exclude = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1251 # "roc_curves.png", # Remove ROC curves from test tab |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1252 "confusion_matrix__label_top5.png", # Remove standard confusion matrix |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1253 "confusion_matrix__label_top10.png", # Remove duplicate |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1254 "confusion_matrix__label_top6.png", # Remove duplicate |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1255 "confusion_matrix_entropy__label_top10.png", # Keep only top5 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1256 "confusion_matrix_entropy__label_top6.png", # Keep only top5 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1257 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1258 title_is_test = title.lower().startswith("test") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1259 if title_is_test and output_type == "binary": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1260 default_exclude.update( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1261 { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1262 "confusion_matrix__label_top2.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1263 "confusion_matrix_entropy__label_top2.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1264 "roc_curves_from_prediction_statistics.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1265 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1266 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1267 elif title_is_test and output_type == "category": |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1268 default_exclude.update( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1269 { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1270 "compare_classifiers_multiclass_multimetric__label_best10.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1271 "compare_classifiers_multiclass_multimetric__label_sorted.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1272 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1273 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1274 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1275 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1276 imgs = [ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1277 img |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1278 for img in imgs |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1279 if img.name not in default_exclude |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1280 and img.name not in exclude_names |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1281 and not ( |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1282 "learning_curves" in img.stem |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1283 and "loss" in img.stem |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1284 and "label" in img.stem |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1285 ) |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1286 ] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1287 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1288 if not imgs: |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1289 return "" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1290 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1291 # Sort images by name for consistent ordering (works with string and numeric labels) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1292 imgs = sorted(imgs, key=lambda x: x.name) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1293 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1294 html_section = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1295 custom_titles = { |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1296 "compare_classifiers_multiclass_multimetric__label_top10": "Metric Comparison by Label", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1297 "compare_classifiers_performance_from_prob": "Label Metric Comparison by Probability", |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1298 } |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1299 for img in imgs: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1300 b64 = encode_image_to_base64(str(img)) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1301 default_title = img.stem.replace("_", " ").title() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1302 img_title = custom_titles.get(img.stem, default_title) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1303 html_section += ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1304 f"<h2 style='text-align: center;'>{img_title}</h2>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1305 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1306 f'<img src="data:image/png;base64,{b64}" ' |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1307 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1308 f"</div>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1309 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1310 return html_section |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1311 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1312 # Show dataset overview, performance first, then config |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1313 predictions_csv_path = exp_dir / "predictions.csv" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1314 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1315 tab1_content = dataset_overview_html + metrics_html + config_html |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1316 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1317 tab2_content = train_val_metrics_html |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1318 # Preload binary threshold plot so it appears first in Train/Val tab |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1319 threshold_plot = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1320 threshold_value = ( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1321 config_for_summary.get("threshold") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1322 if config_for_summary.get("threshold") is not None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1323 else config.get("threshold") |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1324 ) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1325 if threshold_value is None and output_type == "binary": |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1326 threshold_value = 0.5 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1327 if output_type == "binary" and predictions_csv_path.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1328 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1329 threshold_plot = build_binary_threshold_plot( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1330 str(predictions_csv_path), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1331 label_data_path=str(config.get("label_column_data_path")) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1332 if config.get("label_column_data_path") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1333 else None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1334 split_value=1, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1335 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1336 except Exception as e: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1337 logger.warning(f"Could not generate validation threshold plot: {e}") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1338 |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1339 if train_stats_path.exists(): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1340 try: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1341 if output_type == "regression": |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1342 tv_plots = build_regression_train_val_plots(str(train_stats_path)) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1343 tab2_content = append_plot_blocks(tab2_content, tv_plots) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1344 else: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1345 tv_plots = build_train_validation_plots(str(train_stats_path)) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1346 # Add threshold plot first, then other train/val plots |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1347 if threshold_plot: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1348 tab2_content = append_plot_blocks(tab2_content, [threshold_plot]) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1349 # Only append once; avoid duplicates if added elsewhere |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1350 threshold_plot = None |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1351 tab2_content = append_plot_blocks(tab2_content, tv_plots) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1352 if threshold_plot or tv_plots: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1353 logger.info( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1354 f"Added {len(tv_plots) + (1 if threshold_plot else 0)} train/val diagnostic plots" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1355 ) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1356 except Exception as e: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1357 logger.warning(f"Could not generate train/val plots: {e}") |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1358 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1359 # Only include training PNGs for regression; classification is handled by filtered Plotly plots |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1360 if output_type == "regression": |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1361 tab2_content += render_img_section( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1362 "Training and Validation Visualizations", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1363 train_viz_dir, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1364 output_type, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1365 exclude_names={ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1366 "compare_classifiers_performance_from_prob.png", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1367 "roc_curves_from_prediction_statistics.png", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1368 "precision_recall_curves_from_prediction_statistics.png", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1369 "precision_recall_curve.png", |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1370 }, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1371 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1372 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1373 # Validation diagnostics (calibration/threshold) from predictions.csv, using split=1 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1374 if output_type in ("binary", "category") and predictions_csv_path.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1375 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1376 val_diag_plots = build_prediction_diagnostics( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1377 str(predictions_csv_path), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1378 label_data_path=str(config.get("label_column_data_path")) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1379 if config.get("label_column_data_path") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1380 else None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1381 split_value=1, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1382 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1383 val_conf_plots = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1384 p for p in val_diag_plots if "Prediction Confidence Distribution" in p.get("title", "") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1385 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1386 tab2_content = append_plot_blocks( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1387 tab2_content, val_conf_plots, " (Validation)" |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1388 ) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1389 except Exception as e: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1390 logger.warning(f"Could not generate validation diagnostics: {e}") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1391 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1392 # --- Predictions vs Ground Truth table (REGRESSION ONLY) --- |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1393 preds_section = "" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1394 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1395 if output_type == "regression" and parquet_path.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1396 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1397 # 1) load predictions from Parquet |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1398 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1399 # assume the column containing your model's prediction is named "prediction" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1400 # or contains that substring: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1401 pred_col = next( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1402 (c for c in df_preds.columns if "prediction" in c.lower()), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1403 None, |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1404 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1405 if pred_col is None: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1406 raise ValueError("No prediction column found in Parquet output") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1407 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1408 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1409 # 2) load ground truth for the test split from prepared CSV |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1410 df_all = pd.read_csv(config["label_column_data_path"]) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1411 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1412 LABEL_COLUMN_NAME |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1413 ].reset_index(drop=True) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1414 # 3) concatenate side-by-side |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1415 df_table = pd.concat([df_gt, df_pred], axis=1) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1416 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1417 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1418 # 4) render as HTML |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1419 preds_html = df_table.to_html(index=False, classes="predictions-table") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1420 preds_section = ( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1421 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1422 "<div class='preds-controls'>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1423 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1424 "</div>" |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1425 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:350px; margin-bottom:20px;'>" |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1426 + preds_html |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1427 + "</div>" |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1428 ) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1429 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1430 logger.warning(f"Could not build Predictions vs GT table: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1431 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1432 tab3_content = test_metrics_html + preds_section |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1433 |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1434 if output_type == "regression" and train_stats_path.exists(): |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1435 try: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1436 test_plots = build_regression_test_plots(str(train_stats_path)) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1437 tab3_content = append_plot_blocks(tab3_content, test_plots) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1438 if test_plots: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1439 logger.info(f"Generated {len(test_plots)} regression test plots") |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1440 except Exception as e: |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1441 logger.warning(f"Could not generate regression test plots: {e}") |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1442 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1443 if output_type in ("binary", "category") and test_stats_path.exists(): |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1444 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1445 interactive_plots = build_classification_plots( |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1446 str(test_stats_path), |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1447 str(train_stats_path) if train_stats_path.exists() else None, |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1448 metadata_csv_path=str(label_metadata_path) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1449 if label_metadata_path and label_metadata_path.exists() |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1450 else None, |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1451 train_set_metadata_path=str(train_set_metadata_path) |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1452 if train_set_metadata_path.exists() |
|
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1453 else None, |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1454 threshold=threshold_value, |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1455 ) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1456 tab3_content = append_plot_blocks(tab3_content, interactive_plots) |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1457 if interactive_plots: |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1458 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots") |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1459 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1460 logger.warning(f"Could not generate Plotly plots: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1461 |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1462 # Multi-class transparency plots from test stats (replace ROC/PR for multi-class) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1463 if output_type == "category" and test_stats_path.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1464 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1465 multi_curves = build_multiclass_metric_plots(str(test_stats_path)) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1466 tab3_content = append_plot_blocks(tab3_content, multi_curves) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1467 if multi_curves: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1468 logger.info("Added multi-class per-class metric plots to test tab") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1469 except Exception as e: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1470 logger.warning(f"Could not generate multi-class metric plots: {e}") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1471 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1472 # Test diagnostics (confidence histogram) from predictions.csv, using split=2 |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1473 if predictions_csv_path.exists(): |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1474 try: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1475 test_diag_plots = build_prediction_diagnostics( |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1476 str(predictions_csv_path), |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1477 label_data_path=str(config.get("label_column_data_path")) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1478 if config.get("label_column_data_path") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1479 else None, |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1480 split_value=2, |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1481 ) |
|
17
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1482 test_conf_plots = [ |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1483 p for p in test_diag_plots if "Prediction Confidence Distribution" in p.get("title", "") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1484 ] |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1485 if test_conf_plots: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1486 tab3_content = append_plot_blocks(tab3_content, test_conf_plots) |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1487 logger.info("Added test prediction confidence plot") |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1488 except Exception as e: |
|
db9be962dc13
planemo upload for repository https://github.com/goeckslab/gleam.git commit 9db874612b0c3e4f53d639459fe789b762660cd6
goeckslab
parents:
16
diff
changeset
|
1489 logger.warning(f"Could not generate test diagnostics: {e}") |
|
15
d17e3a1b8659
planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents:
12
diff
changeset
|
1490 |
|
12
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1491 # Add static TEST PNGs (with default dedupe/exclusions) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1492 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1493 modal_html = get_metrics_help_modal() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1494 html += tabbed_html + modal_html + get_html_closing() |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1495 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1496 try: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1497 with open(report_path, "w") as f: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1498 f.write(html) |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1499 logger.info(f"HTML report generated at: {report_path}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1500 except Exception as e: |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1501 logger.error(f"Failed to write HTML report: {e}") |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1502 raise |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1503 |
|
bcfa2e234a80
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff
changeset
|
1504 return report_path |
