annotate image_learner_cli.py @ 3:2c3a3dfaf1a9 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 5ab02b6688c9af85525dd225134db8f2bbed76ed
author goeckslab
date Fri, 04 Jul 2025 03:44:56 +0000 (13 hours ago)
parents 186424a7eca7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1 import argparse
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2 import json
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
3 import logging
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
4 import os
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
5 import shutil
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
6 import sys
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
7 import tempfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
8 import zipfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
9 from pathlib import Path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
10 from typing import Any, Dict, Optional, Protocol, Tuple
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
11
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
12 import pandas as pd
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
13 import pandas.api.types as ptypes
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
14 import yaml
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
15 from constants import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
16 IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
17 LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
18 METRIC_DISPLAY_NAMES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
19 MODEL_ENCODER_TEMPLATES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
20 SPLIT_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
21 TEMP_CONFIG_FILENAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
22 TEMP_CSV_FILENAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
23 TEMP_DIR_PREFIX
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
24 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
25 from ludwig.globals import (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
26 DESCRIPTION_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
27 PREDICTIONS_PARQUET_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
28 TEST_STATISTICS_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
29 TRAIN_SET_METADATA_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
30 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
31 from ludwig.utils.data_utils import get_split_path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
32 from ludwig.visualize import get_visualizations_registry
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
33 from sklearn.model_selection import train_test_split
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
34 from utils import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
35 build_tabbed_html,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
36 encode_image_to_base64,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
37 get_html_closing,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
38 get_html_template,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
39 get_metrics_help_modal
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
40 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
41
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
42 # --- Logging Setup ---
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
43 logging.basicConfig(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
44 level=logging.INFO,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
45 format='%(asctime)s %(levelname)s %(name)s: %(message)s',
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
46 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
47 logger = logging.getLogger("ImageLearner")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
48
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
49
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
50 def format_config_table_html(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
51 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
52 split_info: Optional[str] = None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
53 training_progress: dict = None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
54 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
55 display_keys = [
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
56 "task_type",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
57 "model_name",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
58 "epochs",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
59 "batch_size",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
60 "fine_tune",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
61 "use_pretrained",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
62 "learning_rate",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
63 "random_seed",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
64 "early_stop",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
65 ]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
66
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
67 rows = []
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
68
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
69 for key in display_keys:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
70 val = config.get(key, "N/A")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
71 if key == "task_type":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
72 val = val.title() if isinstance(val, str) else val
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
73 if key == "batch_size":
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
74 if val is not None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
75 val = int(val)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
76 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
77 if training_progress:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
78 val = "Auto-selected batch size by Ludwig:<br>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
79 resolved_val = training_progress.get("batch_size")
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
80 val += f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
81 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
82 val = "auto"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
83 if key == "learning_rate":
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
84 resolved_val = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
85 if val is None or val == "auto":
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
86 if training_progress:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
87 resolved_val = training_progress.get("learning_rate")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
88 val = (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
89 "Auto-selected learning rate by Ludwig:<br>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
90 f"<span style='font-size: 0.85em;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
91 f"{resolved_val if resolved_val else val}</span><br>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
92 "<span style='font-size: 0.85em;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
93 "Based on model architecture and training setup "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
94 "(e.g., fine-tuning).<br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
95 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
96 "#trainer-parameters' target='_blank'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
97 "Ludwig Trainer Parameters</a> for details."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
98 "</span>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
99 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
100 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
101 val = (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
102 "Auto-selected by Ludwig<br>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
103 "<span style='font-size: 0.85em;'>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
104 "Automatically tuned based on architecture and dataset.<br>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
105 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
106 "#trainer-parameters' target='_blank'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
107 "Ludwig Trainer Parameters</a> for details."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
108 "</span>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
109 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
110 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
111 val = f"{val:.6f}"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
112 if key == "epochs":
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
113 if (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
114 training_progress
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
115 and "epoch" in training_progress
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
116 and val > training_progress["epoch"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
117 ):
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
118 val = (
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
119 f"Because of early stopping: the training "
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
120 f"stopped at epoch {training_progress['epoch']}"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
121 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
122
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
123 if val is None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
124 continue
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
125 rows.append(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
126 f"<tr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
127 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
128 f"{key.replace('_', ' ').title()}</td>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
129 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
130 f"{val}</td>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
131 f"</tr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
132 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
133
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
134 aug_cfg = config.get("augmentation")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
135 if aug_cfg:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
136 types = [str(a.get("type", "")) for a in aug_cfg]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
137 aug_val = ", ".join(types)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
138 rows.append(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
139 "<tr>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
140 "<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>Augmentation</td>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
141 "<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
142 f"{aug_val}</td>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
143 "</tr>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
144 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
145
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
146 if split_info:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
147 rows.append(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
148 f"<tr>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
149 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
150 f"Data Split</td>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
151 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
152 f"{split_info}</td>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
153 f"</tr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
154 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
155
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
156 return (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
157 "<h2 style='text-align: center;'>Training Setup</h2>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
158 "<div style='display: flex; justify-content: center;'>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
159 "<table style='border-collapse: collapse; width: 60%; table-layout: auto;'>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
160 "<thead><tr>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
161 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
162 "Parameter</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
163 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
164 "Value</th>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
165 "</tr></thead><tbody>" + "".join(rows) + "</tbody></table></div><br>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
166 "<p style='text-align: center; font-size: 0.9em;'>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
167 "Model trained using Ludwig.<br>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
168 "If want to learn more about Ludwig default settings,"
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
169 "please check their <a href='https://ludwig.ai' target='_blank'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
170 "website(ludwig.ai)</a>."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
171 "</p><hr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
172 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
173
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
174
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
175 def detect_output_type(test_stats):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
176 """Detects if the output type is 'binary' or 'category' based on test statistics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
177 label_stats = test_stats.get("label", {})
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
178 if "mean_squared_error" in label_stats:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
179 return "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
180 per_class = label_stats.get("per_class_stats", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
181 if len(per_class) == 2:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
182 return "binary"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
183 return "category"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
184
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
185
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
186 def extract_metrics_from_json(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
187 train_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
188 test_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
189 output_type: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
190 ) -> dict:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
191 """Extracts relevant metrics from training and test statistics based on the output type."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
192 metrics = {"training": {}, "validation": {}, "test": {}}
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
193
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
194 def get_last_value(stats, key):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
195 val = stats.get(key)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
196 if isinstance(val, list) and val:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
197 return val[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
198 elif isinstance(val, (int, float)):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
199 return val
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
200 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
201
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
202 for split in ["training", "validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
203 split_stats = train_stats.get(split, {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
204 if not split_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
205 logging.warning(f"No statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
206 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
207 label_stats = split_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
208 if not label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
209 logging.warning(f"No label statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
210 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
211 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
212 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
213 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
214 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
215 "precision": get_last_value(label_stats, "precision"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
216 "recall": get_last_value(label_stats, "recall"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
217 "specificity": get_last_value(label_stats, "specificity"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
218 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
219 }
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
220 elif output_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
221 metrics[split] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
222 "loss": get_last_value(label_stats, "loss"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
223 "mean_absolute_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
224 label_stats, "mean_absolute_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
225 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
226 "mean_absolute_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
227 label_stats, "mean_absolute_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
228 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
229 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
230 "root_mean_squared_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
231 label_stats, "root_mean_squared_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
232 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
233 "root_mean_squared_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
234 label_stats, "root_mean_squared_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
235 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
236 "r2": get_last_value(label_stats, "r2"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
237 }
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
238 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
239 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
240 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
241 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
242 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
243 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
244 "hits_at_k": get_last_value(label_stats, "hits_at_k"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
245 }
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
246
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
247 # Test metrics: dynamic extraction according to exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
248 test_label_stats = test_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
249 if not test_label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
250 logging.warning("No label statistics found for test split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
251 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
252 combined_stats = test_stats.get("combined", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
253 overall_stats = test_label_stats.get("overall_stats", {})
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
254
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
255 # Define exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
256 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
257 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
258 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
259 exclude = {"per_class_stats", "confusion_matrix"}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
260
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
261 # 1. Get all scalar test_label_stats not excluded
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
262 test_metrics = {}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
263 for k, v in test_label_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
264 if k in exclude:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
265 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
266 if k == "overall_stats":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
267 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
268 if isinstance(v, (int, float, str, bool)):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
269 test_metrics[k] = v
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
270
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
271 # 2. Add overall_stats (flattened)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
272 for k, v in overall_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
273 test_metrics[k] = v
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
274
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
275 # 3. Optionally include combined/loss if present and not already
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
276 if "loss" in combined_stats and "loss" not in test_metrics:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
277 test_metrics["loss"] = combined_stats["loss"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
278
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
279 metrics["test"] = test_metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
280
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
281 return metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
282
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
283
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
284 def generate_table_row(cells, styles):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
285 """Helper function to generate an HTML table row."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
286 return (
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
287 "<tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
288 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
289 + "</tr>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
290 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
291
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
292
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
293 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
294 """Formats a combined HTML table for training, validation, and test metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
295 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
296 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
297 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
298 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
299 if (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
300 metric_key in all_metrics["validation"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
301 and metric_key in all_metrics["test"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
302 ):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
303 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
304 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
305 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
306 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
307 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
308 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
309 te = all_metrics["test"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
310 if all(x is not None for x in [t, v, te]):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
311 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"])
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
312
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
313 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
314 return "<table><tr><td>No metric values found.</td></tr></table>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
315
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
316 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
317 "<h2 style='text-align: center;'>Model Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
318 "<div style='display: flex; justify-content: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
319 "<table style='border-collapse: collapse; table-layout: auto;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
320 "<thead><tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
321 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
322 "white-space: nowrap;'>Metric</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
323 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
324 "white-space: nowrap;'>Train</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
325 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
326 "white-space: nowrap;'>Validation</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
327 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
328 "white-space: nowrap;'>Test</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
329 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
330 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
331 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
332 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
333 row,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
334 "padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
335 "white-space: nowrap;",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
336 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
337 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
338 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
339
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
340
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
341 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
342 """Formats an HTML table for training and validation metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
343 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
344 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
345 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
346 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
347 if metric_key in all_metrics["validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
348 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
349 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
350 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
351 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
352 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
353 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
354 if t is not None and v is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
355 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"])
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
356
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
357 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
358 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
359
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
360 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
361 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
362 "<div style='display: flex; justify-content: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
363 "<table style='border-collapse: collapse; table-layout: auto;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
364 "<thead><tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
365 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
366 "white-space: nowrap;'>Metric</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
367 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
368 "white-space: nowrap;'>Train</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
369 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
370 "white-space: nowrap;'>Validation</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
371 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
372 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
373 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
374 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
375 row,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
376 "padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
377 "white-space: nowrap;",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
378 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
379 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
380 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
381
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
382
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
383 def format_test_merged_stats_table_html(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
384 test_metrics: Dict[str, Optional[float]],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
385 ) -> str:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
386 """Formats an HTML table for test metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
387 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
388 for key in sorted(test_metrics.keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
389 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title())
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
390 value = test_metrics[key]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
391 if value is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
392 rows.append([display_name, f"{value:.4f}"])
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
393
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
394 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
395 return "<table><tr><td>No test metric values found.</td></tr></table>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
396
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
397 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
398 "<h2 style='text-align: center;'>Test Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
399 "<div style='display: flex; justify-content: center;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
400 "<table style='border-collapse: collapse; table-layout: auto;'>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
401 "<thead><tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
402 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
403 "white-space: nowrap;'>Metric</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
404 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
405 "white-space: nowrap;'>Test</th>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
406 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
407 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
408 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
409 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
410 row,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
411 "padding: 10px; border: 1px solid #ccc; text-align: center; "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
412 "white-space: nowrap;",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
413 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
414 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
415 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
416
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
417
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
418 def split_data_0_2(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
419 df: pd.DataFrame,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
420 split_column: str,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
421 validation_size: float = 0.15,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
422 random_state: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
423 label_column: Optional[str] = None,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
424 ) -> pd.DataFrame:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
425 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation)."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
426 out = df.copy()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
427 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
428
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
429 idx_train = out.index[out[split_column] == 0].tolist()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
430
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
431 if not idx_train:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
432 logger.info("No rows with split=0; nothing to do.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
433 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
434 stratify_arr = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
435 if label_column and label_column in out.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
436 label_counts = out.loc[idx_train, label_column].value_counts()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
437 if label_counts.size > 1 and (label_counts.min() * validation_size) >= 1:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
438 stratify_arr = out.loc[idx_train, label_column]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
439 else:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
440 logger.warning(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
441 "Cannot stratify (too few labels); splitting without stratify."
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
442 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
443 if validation_size <= 0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
444 logger.info("validation_size <= 0; keeping all as train.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
445 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
446 if validation_size >= 1:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
447 logger.info("validation_size >= 1; moving all train → validation.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
448 out.loc[idx_train, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
449 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
450 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
451 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
452 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
453 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
454 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
455 stratify=stratify_arr,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
456 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
457 except ValueError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
458 logger.warning(f"Stratified split failed ({e}); retrying without stratify.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
459 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
460 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
461 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
462 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
463 stratify=None,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
464 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
465 out.loc[train_idx, split_column] = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
466 out.loc[val_idx, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
467 out[split_column] = out[split_column].astype(int)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
468 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
469
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
470
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
471 class Backend(Protocol):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
472 """Interface for a machine learning backend."""
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
473
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
474 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
475 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
476 config_params: Dict[str, Any],
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
477 split_config: Dict[str, Any],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
478 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
479 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
480
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
481 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
482 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
483 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
484 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
485 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
486 random_seed: int,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
487 ) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
488 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
489
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
490 def generate_plots(self, output_dir: Path) -> None:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
491 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
492
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
493 def generate_html_report(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
494 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
495 title: str,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
496 output_dir: str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
497 config: Dict[str, Any],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
498 split_info: str,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
499 ) -> Path:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
500 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
501
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
502
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
503 class LudwigDirectBackend:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
504 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
505
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
506 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
507 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
508 config_params: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
509 split_config: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
510 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
511 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
512
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
513 model_name = config_params.get("model_name", "resnet18")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
514 use_pretrained = config_params.get("use_pretrained", False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
515 fine_tune = config_params.get("fine_tune", False)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
516 if use_pretrained:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
517 trainable = bool(fine_tune)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
518 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
519 trainable = True
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
520 epochs = config_params.get("epochs", 10)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
521 batch_size = config_params.get("batch_size")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
522 num_processes = config_params.get("preprocessing_num_processes", 1)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
523 early_stop = config_params.get("early_stop", None)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
524 learning_rate = config_params.get("learning_rate")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
525 learning_rate = "auto" if learning_rate is None else float(learning_rate)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
526 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
527 if isinstance(raw_encoder, dict):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
528 encoder_config = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
529 **raw_encoder,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
530 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
531 "trainable": trainable,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
532 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
533 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
534 encoder_config = {"type": raw_encoder}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
535
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
536 batch_size_cfg = batch_size or "auto"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
537
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
538 label_column_path = config_params.get("label_column_data_path")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
539 label_series = None
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
540 if label_column_path is not None and Path(label_column_path).exists():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
541 try:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
542 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
543 except Exception as e:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
544 logger.warning(f"Could not read label column for task detection: {e}")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
545
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
546 if (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
547 label_series is not None
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
548 and ptypes.is_numeric_dtype(label_series.dtype)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
549 and label_series.nunique() > 10
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
550 ):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
551 task_type = "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
552 else:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
553 task_type = "classification"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
554
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
555 config_params["task_type"] = task_type
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
556
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
557 image_feat: Dict[str, Any] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
558 "name": IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
559 "type": "image",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
560 "encoder": encoder_config,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
561 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
562 if config_params.get("augmentation") is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
563 image_feat["augmentation"] = config_params["augmentation"]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
564
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
565 if task_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
566 output_feat = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
567 "name": LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
568 "type": "number",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
569 "decoder": {"type": "regressor"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
570 "loss": {"type": "mean_squared_error"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
571 "evaluation": {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
572 "metrics": [
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
573 "mean_squared_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
574 "mean_absolute_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
575 "r2",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
576 ]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
577 },
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
578 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
579 val_metric = config_params.get("validation_metric", "mean_squared_error")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
580
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
581 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
582 num_unique_labels = (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
583 label_series.nunique() if label_series is not None else 2
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
584 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
585 output_type = "binary" if num_unique_labels == 2 else "category"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
586 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type}
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
587 val_metric = None
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
588
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
589 conf: Dict[str, Any] = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
590 "model_type": "ecd",
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
591 "input_features": [image_feat],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
592 "output_features": [output_feat],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
593 "combiner": {"type": "concat"},
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
594 "trainer": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
595 "epochs": epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
596 "early_stop": early_stop,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
597 "batch_size": batch_size_cfg,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
598 "learning_rate": learning_rate,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
599 # only set validation_metric for regression
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
600 **({"validation_metric": val_metric} if val_metric else {}),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
601 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
602 "preprocessing": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
603 "split": split_config,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
604 "num_processes": num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
605 "in_memory": False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
606 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
607 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
608
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
609 logger.debug("LudwigDirectBackend: Config dict built.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
610 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
611 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
612 logger.info("LudwigDirectBackend: YAML config generated.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
613 return yaml_str
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
614 except Exception:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
615 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
616 "LudwigDirectBackend: Failed to serialize YAML.",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
617 exc_info=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
618 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
619 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
620
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
621 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
622 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
623 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
624 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
625 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
626 random_seed: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
627 ) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
628 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
629 logger.info("LudwigDirectBackend: Starting experiment execution.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
630
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
631 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
632 from ludwig.experiment import experiment_cli
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
633 except ImportError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
634 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
635 "LudwigDirectBackend: Could not import experiment_cli.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
636 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
637 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
638 raise RuntimeError("Ludwig import failed.") from e
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
639
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
640 output_dir.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
641
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
642 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
643 experiment_cli(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
644 dataset=str(dataset_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
645 config=str(config_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
646 output_directory=str(output_dir),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
647 random_seed=random_seed,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
648 )
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
649 logger.info(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
650 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
651 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
652 except TypeError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
653 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
654 "LudwigDirectBackend: Argument mismatch in experiment_cli call.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
655 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
656 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
657 raise RuntimeError("Ludwig argument error.") from e
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
658 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
659 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
660 "LudwigDirectBackend: Experiment execution error.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
661 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
662 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
663 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
664
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
665 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
666 """Retrieve the learning rate used in the most recent Ludwig run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
667 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
668 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
669 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
670 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
671 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
672
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
673 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
674 logger.warning(f"No experiment run directories found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
675 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
676
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
677 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
678 if not progress_file.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
679 logger.warning(f"No training_progress.json found in {progress_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
680 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
681
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
682 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
683 with progress_file.open("r", encoding="utf-8") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
684 data = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
685 return {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
686 "learning_rate": data.get("learning_rate"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
687 "batch_size": data.get("batch_size"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
688 "epoch": data.get("epoch"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
689 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
690 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
691 logger.warning(f"Failed to read training progress info: {e}")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
692 return {}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
693
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
694 def convert_parquet_to_csv(self, output_dir: Path):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
695 """Convert the predictions Parquet file to CSV."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
696 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
697 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
698 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
699 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
700 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
701 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
702 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
703 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
704 exp_dir = exp_dirs[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
705 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
706 csv_path = exp_dir / "predictions.csv"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
707 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
708 df = pd.read_parquet(parquet_path)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
709 df.to_csv(csv_path, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
710 logger.info(f"Converted Parquet to CSV: {csv_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
711 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
712 logger.error(f"Error converting Parquet to CSV: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
713
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
714 def generate_plots(self, output_dir: Path) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
715 """Generate all registered Ludwig visualizations for the latest experiment run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
716 logger.info("Generating all Ludwig visualizations…")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
717
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
718 test_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
719 "compare_performance",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
720 "compare_classifiers_performance_from_prob",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
721 "compare_classifiers_performance_from_pred",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
722 "compare_classifiers_performance_changing_k",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
723 "compare_classifiers_multiclass_multimetric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
724 "compare_classifiers_predictions",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
725 "confidence_thresholding_2thresholds_2d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
726 "confidence_thresholding_2thresholds_3d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
727 "confidence_thresholding",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
728 "confidence_thresholding_data_vs_acc",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
729 "binary_threshold_vs_metric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
730 "roc_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
731 "roc_curves_from_test_statistics",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
732 "calibration_1_vs_all",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
733 "calibration_multiclass",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
734 "confusion_matrix",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
735 "frequency_vs_f1",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
736 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
737 train_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
738 "learning_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
739 "compare_classifiers_performance_subset",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
740 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
741
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
742 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
743 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
744 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
745 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
746 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
747 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
748 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
749 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
750 exp_dir = exp_dirs[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
751
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
752 viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
753 viz_dir.mkdir(exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
754 train_viz = viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
755 test_viz = viz_dir / "test"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
756 train_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
757 test_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
758
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
759 def _check(p: Path) -> Optional[str]:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
760 return str(p) if p.exists() else None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
761
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
762 training_stats = _check(exp_dir / "training_statistics.json")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
763 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
764 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
765 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
766
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
767 dataset_path = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
768 split_file = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
769 desc = exp_dir / DESCRIPTION_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
770 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
771 with open(desc, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
772 cfg = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
773 dataset_path = _check(Path(cfg.get("dataset", "")))
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
774 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
775
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
776 output_feature = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
777 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
778 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
779 output_feature = cfg["config"]["output_features"][0]["name"]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
780 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
781 pass
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
782 if not output_feature and test_stats:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
783 with open(test_stats, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
784 stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
785 output_feature = next(iter(stats.keys()), "")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
786
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
787 viz_registry = get_visualizations_registry()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
788 for viz_name, viz_func in viz_registry.items():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
789 if viz_name in train_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
790 viz_dir_plot = train_viz
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
791 elif viz_name in test_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
792 viz_dir_plot = test_viz
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
793 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
794 continue
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
795
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
796 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
797 viz_func(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
798 training_statistics=[training_stats] if training_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
799 test_statistics=[test_stats] if test_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
800 probabilities=[probs_path] if probs_path else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
801 output_feature_name=output_feature,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
802 ground_truth_split=2,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
803 top_n_classes=[0],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
804 top_k=3,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
805 ground_truth_metadata=gt_metadata,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
806 ground_truth=dataset_path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
807 split_file=split_file,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
808 output_directory=str(viz_dir_plot),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
809 normalize=False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
810 file_format="png",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
811 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
812 logger.info(f"✔ Generated {viz_name}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
813 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
814 logger.warning(f"✘ Skipped {viz_name}: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
815
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
816 logger.info(f"All visualizations written to {viz_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
817
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
818 def generate_html_report(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
819 self,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
820 title: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
821 output_dir: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
822 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
823 split_info: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
824 ) -> Path:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
825 """Assemble an HTML report from visualizations under train_val/ and test/ folders."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
826 cwd = Path.cwd()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
827 report_name = title.lower().replace(" ", "_") + "_report.html"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
828 report_path = cwd / report_name
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
829 output_dir = Path(output_dir)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
830 output_type = None
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
831
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
832 exp_dirs = sorted(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
833 output_dir.glob("experiment_run*"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
834 key=lambda p: p.stat().st_mtime,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
835 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
836 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
837 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
838 exp_dir = exp_dirs[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
839
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
840 base_viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
841 train_viz_dir = base_viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
842 test_viz_dir = base_viz_dir / "test"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
843
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
844 html = get_html_template()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
845 html += f"<h1>{title}</h1>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
846
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
847 metrics_html = ""
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
848 train_val_metrics_html = ""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
849 test_metrics_html = ""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
850 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
851 train_stats_path = exp_dir / "training_statistics.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
852 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
853 if train_stats_path.exists() and test_stats_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
854 with open(train_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
855 train_stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
856 with open(test_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
857 test_stats = json.load(f)
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
858 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
859 metrics_html = format_stats_table_html(train_stats, test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
860 train_val_metrics_html = format_train_val_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
861 train_stats, test_stats
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
862 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
863 test_metrics_html = format_test_merged_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
864 extract_metrics_from_json(train_stats, test_stats, output_type)[
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
865 "test"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
866 ]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
867 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
868 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
869 logger.warning(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
870 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
871 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
872
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
873 config_html = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
874 training_progress = self.get_training_process(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
875 try:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
876 config_html = format_config_table_html(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
877 config, split_info, training_progress
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
878 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
879 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
880 logger.warning(f"Could not load config for HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
881
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
882 def render_img_section(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
883 title: str, dir_path: Path, output_type: str = None
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
884 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
885 if not dir_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
886 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
887
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
888 imgs = list(dir_path.glob("*.png"))
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
889 if not imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
890 return f"<h2>{title}</h2><p><em>No plots found.</em></p>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
891
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
892 if title == "Test Visualizations" and output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
893 order = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
894 "confusion_matrix__label_top2.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
895 "roc_curves_from_prediction_statistics.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
896 "compare_performance_label.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
897 "confusion_matrix_entropy__label_top2.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
898 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
899 img_names = {img.name: img for img in imgs}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
900 ordered_imgs = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
901 img_names[fname] for fname in order if fname in img_names
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
902 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
903 remaining = sorted(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
904 [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
905 img
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
906 for img in imgs
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
907 if img.name not in order and img.name != "roc_curves.png"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
908 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
909 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
910 imgs = ordered_imgs + remaining
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
911
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
912 elif title == "Test Visualizations" and output_type == "category":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
913 unwanted = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
914 "compare_classifiers_multiclass_multimetric__label_best10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
915 "compare_classifiers_multiclass_multimetric__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
916 "compare_classifiers_multiclass_multimetric__label_worst10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
917 }
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
918 display_order = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
919 "confusion_matrix__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
920 "roc_curves.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
921 "compare_performance_label.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
922 "compare_classifiers_performance_from_prob.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
923 "compare_classifiers_multiclass_multimetric__label_sorted.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
924 "confusion_matrix_entropy__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
925 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
926 img_names = {img.name: img for img in imgs if img.name not in unwanted}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
927 ordered_imgs = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
928 img_names[fname] for fname in display_order if fname in img_names
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
929 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
930 remaining = sorted(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
931 [img for img in img_names.values() if img.name not in display_order]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
932 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
933 imgs = ordered_imgs + remaining
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
934
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
935 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
936 if output_type == "category":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
937 unwanted = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
938 "compare_classifiers_multiclass_multimetric__label_best10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
939 "compare_classifiers_multiclass_multimetric__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
940 "compare_classifiers_multiclass_multimetric__label_worst10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
941 }
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
942 imgs = sorted([img for img in imgs if img.name not in unwanted])
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
943 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
944 imgs = sorted(imgs)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
945
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
946 section_html = f"<h2 style='text-align: center;'>{title}</h2><div>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
947 for img in imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
948 b64 = encode_image_to_base64(str(img))
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
949 section_html += (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
950 f'<div class="plot" style="margin-bottom:20px;text-align:center;">'
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
951 f"<h3>{img.stem.replace('_', ' ').title()}</h3>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
952 f'<img src="data:image/png;base64,{b64}" '
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
953 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />'
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
954 f"</div>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
955 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
956 section_html += "</div>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
957 return section_html
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
958
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
959 tab1_content = config_html + metrics_html
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
960
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
961 tab2_content = train_val_metrics_html + render_img_section(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
962 "Training & Validation Visualizations", train_viz_dir
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
963 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
964
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
965 # --- Predictions vs Ground Truth table ---
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
966 preds_section = ""
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
967 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
968 if parquet_path.exists():
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
969 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
970 # 1) load predictions from Parquet
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
971 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
972 # assume the column containing your model's prediction is named "prediction"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
973 # or contains that substring:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
974 pred_col = next(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
975 (c for c in df_preds.columns if "prediction" in c.lower()),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
976 None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
977 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
978 if pred_col is None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
979 raise ValueError("No prediction column found in Parquet output")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
980 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
981
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
982 # 2) load ground truth for the test split from prepared CSV
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
983 df_all = pd.read_csv(config["label_column_data_path"])
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
984 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
985 LABEL_COLUMN_NAME
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
986 ].reset_index(drop=True)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
987
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
988 # 3) concatenate side‐by‐side
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
989 df_table = pd.concat([df_gt, df_pred], axis=1)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
990 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
991
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
992 # 4) render as HTML
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
993 preds_html = df_table.to_html(index=False, classes="predictions-table")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
994 preds_section = (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
995 "<h2 style='text-align: center;'>Predictions vs. Ground Truth</h2>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
996 "<div style='overflow-x:auto; margin-bottom:20px;'>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
997 + preds_html
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
998 + "</div>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
999 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1000 except Exception as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1001 logger.warning(f"Could not build Predictions vs GT table: {e}")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1002 # Test tab = Metrics + Preds table + Visualizations
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1003
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1004 tab3_content = (
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1005 test_metrics_html
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1006 + preds_section
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1007 + render_img_section("Test Visualizations", test_viz_dir, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1008 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1009
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1010 # assemble the tabs and help modal
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1011 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1012 modal_html = get_metrics_help_modal()
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1013 html += tabbed_html + modal_html + get_html_closing()
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1014
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1015 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1016 with open(report_path, "w") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1017 f.write(html)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1018 logger.info(f"HTML report generated at: {report_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1019 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1020 logger.error(f"Failed to write HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1021 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1022
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1023 return report_path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1024
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1025
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1026 class WorkflowOrchestrator:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1027 """Manages the image-classification workflow."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1028
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1029 def __init__(self, args: argparse.Namespace, backend: Backend):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1030 self.args = args
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1031 self.backend = backend
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1032 self.temp_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1033 self.image_extract_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1034 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1035
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1036 def _create_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1037 """Create temporary output and image extraction directories."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1038 try:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1039 self.temp_dir = Path(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1040 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1041 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1042 self.image_extract_dir = self.temp_dir / "images"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1043 self.image_extract_dir.mkdir()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1044 logger.info(f"Created temp directory: {self.temp_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1045 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1046 logger.error("Failed to create temporary directories", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1047 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1048
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1049 def _extract_images(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1050 """Extract images from ZIP into the temp image directory."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1051 if self.image_extract_dir is None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1052 raise RuntimeError("Temp image directory not initialized.")
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1053 logger.info(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1054 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1055 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1056 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1057 with zipfile.ZipFile(self.args.image_zip, "r") as z:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1058 z.extractall(self.image_extract_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1059 logger.info("Image extraction complete.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1060 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1061 logger.error("Error extracting zip file", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1062 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1063
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1064 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1065 """Load CSV, update image paths, handle splits, and write prepared CSV."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1066 if not self.temp_dir or not self.image_extract_dir:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1067 raise RuntimeError("Temp dirs not initialized before data prep.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1068
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1069 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1070 df = pd.read_csv(self.args.csv_file)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1071 logger.info(f"Loaded CSV: {self.args.csv_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1072 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1073 logger.error("Error loading CSV file", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1074 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1075
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1076 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1077 missing = required - set(df.columns)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1078 if missing:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1079 raise ValueError(f"Missing CSV columns: {', '.join(missing)}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1080
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1081 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1082 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1083 lambda p: str((self.image_extract_dir / p).resolve())
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1084 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1085 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1086 logger.error("Error updating image paths", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1087 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1088
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1089 if SPLIT_COLUMN_NAME in df.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1090 df, split_config, split_info = self._process_fixed_split(df)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1091 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1092 logger.info("No split column; using random split")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1093 split_config = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1094 "type": "random",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1095 "probabilities": self.args.split_probabilities,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1096 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1097 split_info = (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1098 f"No split column in CSV. Used random split: "
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1099 f"{[int(p * 100) for p in self.args.split_probabilities]}% "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1100 f"for train/val/test."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1101 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1102
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1103 final_csv = self.temp_dir / TEMP_CSV_FILENAME
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1104 try:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1105
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1106 df.to_csv(final_csv, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1107 logger.info(f"Saved prepared data to {final_csv}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1108 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1109 logger.error("Error saving prepared CSV", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1110 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1111
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1112 return final_csv, split_config, split_info
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1113
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1114 def _process_fixed_split(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1115 self, df: pd.DataFrame
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1116 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1117 """Process a fixed split column (0=train,1=val,2=test)."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1118 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1119 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1120 col = df[SPLIT_COLUMN_NAME]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1121 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1122 pd.Int64Dtype()
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1123 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1124 if df[SPLIT_COLUMN_NAME].isna().any():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1125 logger.warning("Split column contains non-numeric/missing values.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1126
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1127 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique())
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1128 logger.info(f"Unique split values: {unique}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1129
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1130 if unique == {0, 2}:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1131 df = split_data_0_2(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1132 df,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1133 SPLIT_COLUMN_NAME,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1134 validation_size=self.args.validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1135 label_column=LABEL_COLUMN_NAME,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1136 random_state=self.args.random_seed,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1137 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1138 split_info = (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1139 "Detected a split column (with values 0 and 2) in the input CSV. "
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1140 f"Used this column as a base and reassigned "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1141 f"{self.args.validation_size * 100:.1f}% "
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1142 "of the training set (originally labeled 0) to validation (labeled 1)."
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1143 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1144 logger.info("Applied custom 0/2 split.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1145 elif unique.issubset({0, 1, 2}):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1146 split_info = "Used user-defined split column from CSV."
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1147 logger.info("Using fixed split as-is.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1148 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1149 raise ValueError(f"Unexpected split values: {unique}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1150
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1151 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1152
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1153 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1154 logger.error("Error processing fixed split", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1155 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1156
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1157 def _cleanup_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1158 if self.temp_dir and self.temp_dir.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1159 logger.info(f"Cleaning up temp directory: {self.temp_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1160 shutil.rmtree(self.temp_dir, ignore_errors=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1161 self.temp_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1162 self.image_extract_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1163
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1164 def run(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1165 """Execute the full workflow end-to-end."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1166 logger.info("Starting workflow...")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1167 self.args.output_dir.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1168
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1169 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1170 self._create_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1171 self._extract_images()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1172 csv_path, split_cfg, split_info = self._prepare_data()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1173
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1174 use_pretrained = self.args.use_pretrained or self.args.fine_tune
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1175
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1176 backend_args = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1177 "model_name": self.args.model_name,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1178 "fine_tune": self.args.fine_tune,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1179 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1180 "epochs": self.args.epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1181 "batch_size": self.args.batch_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1182 "preprocessing_num_processes": self.args.preprocessing_num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1183 "split_probabilities": self.args.split_probabilities,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1184 "learning_rate": self.args.learning_rate,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1185 "random_seed": self.args.random_seed,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1186 "early_stop": self.args.early_stop,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1187 "label_column_data_path": csv_path,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1188 "augmentation": self.args.augmentation,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1189 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1190 yaml_str = self.backend.prepare_config(backend_args, split_cfg)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1191
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1192 config_file = self.temp_dir / TEMP_CONFIG_FILENAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1193 config_file.write_text(yaml_str)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1194 logger.info(f"Wrote backend config: {config_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1195
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1196 self.backend.run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1197 csv_path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1198 config_file,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1199 self.args.output_dir,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1200 self.args.random_seed,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1201 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1202 logger.info("Workflow completed successfully.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1203 self.backend.generate_plots(self.args.output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1204 report_file = self.backend.generate_html_report(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1205 "Image Classification Results",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1206 self.args.output_dir,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1207 backend_args,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1208 split_info,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1209 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1210 logger.info(f"HTML report generated at: {report_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1211 self.backend.convert_parquet_to_csv(self.args.output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1212 logger.info("Converted Parquet to CSV.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1213 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1214 logger.error("Workflow execution failed", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1215 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1216 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1217 self._cleanup_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1218
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1219
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1220 def parse_learning_rate(s):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1221 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1222 return float(s)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1223 except (TypeError, ValueError):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1224 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1225
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1226
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1227 def aug_parse(aug_string: str):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1228 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1229 Parse comma-separated augmentation keys into Ludwig augmentation dicts.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1230 Raises ValueError on unknown key.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1231 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1232 mapping = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1233 "random_horizontal_flip": {"type": "random_horizontal_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1234 "random_vertical_flip": {"type": "random_vertical_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1235 "random_rotate": {"type": "random_rotate", "degree": 10},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1236 "random_blur": {"type": "random_blur", "kernel_size": 3},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1237 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1238 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1239 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1240 aug_list = []
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1241 for tok in aug_string.split(","):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1242 key = tok.strip()
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1243 if key not in mapping:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1244 valid = ", ".join(mapping.keys())
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1245 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1246 aug_list.append(mapping[key])
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1247 return aug_list
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1248
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1249
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1250 class SplitProbAction(argparse.Action):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1251 def __call__(self, parser, namespace, values, option_string=None):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1252 train, val, test = values
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1253 total = train + val + test
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1254 if abs(total - 1.0) > 1e-6:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1255 parser.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1256 f"--split-probabilities must sum to 1.0; "
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1257 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1258 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1259 setattr(namespace, self.dest, values)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1260
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1261
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1262 def main():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1263 parser = argparse.ArgumentParser(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1264 description="Image Classification Learner with Pluggable Backends",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1265 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1266 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1267 "--csv-file",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1268 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1269 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1270 help="Path to the input CSV",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1271 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1272 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1273 "--image-zip",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1274 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1275 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1276 help="Path to the images ZIP",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1277 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1278 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1279 "--model-name",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1280 required=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1281 choices=MODEL_ENCODER_TEMPLATES.keys(),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1282 help="Which model template to use",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1283 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1284 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1285 "--use-pretrained",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1286 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1287 help="Use pretrained weights for the model",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1288 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1289 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1290 "--fine-tune",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1291 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1292 help="Enable fine-tuning",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1293 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1294 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1295 "--epochs",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1296 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1297 default=10,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1298 help="Number of training epochs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1299 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1300 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1301 "--early-stop",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1302 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1303 default=5,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1304 help="Early stopping patience",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1305 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1306 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1307 "--batch-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1308 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1309 help="Batch size (None = auto)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1310 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1311 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1312 "--output-dir",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1313 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1314 default=Path("learner_output"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1315 help="Where to write outputs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1316 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1317 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1318 "--validation-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1319 type=float,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1320 default=0.15,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1321 help="Fraction for validation (0.0–1.0)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1322 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1323 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1324 "--preprocessing-num-processes",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1325 type=int,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1326 default=max(1, os.cpu_count() // 2),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1327 help="CPU processes for data prep",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1328 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1329 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1330 "--split-probabilities",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1331 type=float,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1332 nargs=3,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1333 metavar=("train", "val", "test"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1334 action=SplitProbAction,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1335 default=[0.7, 0.1, 0.2],
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1336 help=(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1337 "Random split proportions (e.g., 0.7 0.1 0.2)."
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1338 "Only used if no split column."
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1339 ),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1340 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1341 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1342 "--random-seed",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1343 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1344 default=42,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1345 help="Random seed used for dataset splitting (default: 42)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1346 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1347 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1348 "--learning-rate",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1349 type=parse_learning_rate,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1350 default=None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1351 help="Learning rate. If not provided, Ludwig will auto-select it.",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1352 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1353 parser.add_argument(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1354 "--augmentation",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1355 type=str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1356 default=None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1357 help=(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1358 "Comma-separated list (in order) of any of: "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1359 "random_horizontal_flip, random_vertical_flip, random_rotate, "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1360 "random_blur, random_brightness, random_contrast. "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1361 "E.g. --augmentation random_horizontal_flip,random_rotate"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1362 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1363 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1364
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1365 args = parser.parse_args()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1366
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1367 if not 0.0 <= args.validation_size <= 1.0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1368 parser.error("validation-size must be between 0.0 and 1.0")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1369 if not args.csv_file.is_file():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1370 parser.error(f"CSV not found: {args.csv_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1371 if not args.image_zip.is_file():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1372 parser.error(f"ZIP not found: {args.image_zip}")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1373 if args.augmentation is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1374 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1375 augmentation_setup = aug_parse(args.augmentation)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1376 setattr(args, "augmentation", augmentation_setup)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1377 except ValueError as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1378 parser.error(str(e))
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1379
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1380 backend_instance = LudwigDirectBackend()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1381 orchestrator = WorkflowOrchestrator(args, backend_instance)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1382
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1383 exit_code = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1384 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1385 orchestrator.run()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1386 logger.info("Main script finished successfully.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1387 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1388 logger.error(f"Main script failed.{e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1389 exit_code = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1390 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1391 sys.exit(exit_code)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1392
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1393
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1394 if __name__ == "__main__":
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1395 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1396 import ludwig
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1397
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1398 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1399 except ImportError:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1400 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1401 "Ludwig library not found. Please ensure Ludwig is installed "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1402 "('pip install ludwig[image]')"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1403 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1404 sys.exit(1)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1405
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1406 main()