annotate image_learner_cli.py @ 11:c5150cceab47 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
author goeckslab
date Sat, 18 Oct 2025 03:17:09 +0000
parents b0d893d04d4c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1 import argparse
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2 import json
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
3 import logging
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
4 import os
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
5 import shutil
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
6 import sys
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
7 import tempfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
8 import zipfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
9 from pathlib import Path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
10 from typing import Any, Dict, Optional, Protocol, Tuple
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
11
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
12 import matplotlib
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
13 import numpy as np
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
14 import pandas as pd
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
15 import pandas.api.types as ptypes
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
16 import yaml
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
17 from constants import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
18 IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
19 LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
20 METRIC_DISPLAY_NAMES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
21 MODEL_ENCODER_TEMPLATES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
22 SPLIT_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
23 TEMP_CONFIG_FILENAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
24 TEMP_CSV_FILENAME,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
25 TEMP_DIR_PREFIX,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
26 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
27 from ludwig.globals import (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
28 DESCRIPTION_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
29 PREDICTIONS_PARQUET_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
30 TEST_STATISTICS_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
31 TRAIN_SET_METADATA_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
32 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
33 from ludwig.utils.data_utils import get_split_path
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
34 from plotly_plots import build_classification_plots
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
35 from sklearn.model_selection import train_test_split
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
36 from utils import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
37 build_tabbed_html,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
38 encode_image_to_base64,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
39 get_html_closing,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
40 get_html_template,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
41 get_metrics_help_modal,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
42 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
43
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
44 # Set matplotlib backend after imports
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
45 matplotlib.use('Agg')
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
46
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
47 # --- Logging Setup ---
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
48 logging.basicConfig(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
49 level=logging.INFO,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
50 format="%(asctime)s %(levelname)s %(name)s: %(message)s",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
51 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
52 logger = logging.getLogger("ImageLearner")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
53
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
54 # Optional MetaFormer configuration registry
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
55 META_DEFAULT_CFGS: Dict[str, Any] = {}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
56 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
57 from MetaFormer import default_cfgs as META_DEFAULT_CFGS # type: ignore[attr-defined]
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
58 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
59 logger.debug("MetaFormer default configs unavailable: %s", e)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
60 META_DEFAULT_CFGS = {}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
61
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
62 # Try to import Ludwig visualization registry (may fail due to optional dependencies)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
63 # This must come AFTER logger is defined
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
64 _ludwig_viz_available = False
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
65 get_visualizations_registry = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
66 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
67 from ludwig.visualize import get_visualizations_registry
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
68 _ludwig_viz_available = True
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
69 logger.info("Ludwig visualizations available")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
70 except ImportError as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
71 logger.warning(f"Ludwig visualizations not available: {e}. Will use fallback plots only.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
72 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
73 logger.warning(f"Ludwig visualizations not available due to dependency issues: {e}. Will use fallback plots only.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
74
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
75 # --- MetaFormer patching integration ---
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
76 _metaformer_patch_ok = False
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
77 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
78 from MetaFormer.metaformer_stacked_cnn import patch_ludwig_stacked_cnn as _mf_patch
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
79 if _mf_patch():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
80 _metaformer_patch_ok = True
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
81 logger.info("MetaFormer patching applied for Ludwig stacked_cnn encoder.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
82 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
83 logger.warning(f"MetaFormer stacked CNN not available: {e}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
84 _metaformer_patch_ok = False
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
85
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
86 # Note: CAFormer models are now handled through MetaFormer framework
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
87
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
88
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
89 def format_config_table_html(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
90 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
91 split_info: Optional[str] = None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
92 training_progress: dict = None,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
93 output_type: Optional[str] = None,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
94 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
95 display_keys = [
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
96 "task_type",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
97 "model_name",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
98 "epochs",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
99 "batch_size",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
100 "fine_tune",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
101 "use_pretrained",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
102 "learning_rate",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
103 "random_seed",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
104 "early_stop",
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
105 "threshold",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
106 ]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
107
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
108 rows = []
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
109
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
110 for key in display_keys:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
111 val = config.get(key, None)
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
112 if key == "threshold":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
113 if output_type != "binary":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
114 continue
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
115 val = val if val is not None else 0.5
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
116 val_str = f"{val:.2f}"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
117 if val == 0.5:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
118 val_str += " (default)"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
119 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
120 if key == "task_type":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
121 val_str = val.title() if isinstance(val, str) else "N/A"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
122 elif key == "batch_size":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
123 if val is not None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
124 val_str = int(val)
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
125 else:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
126 val = "auto"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
127 val_str = "auto"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
128 resolved_val = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
129 if val is None or val == "auto":
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
130 if training_progress:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
131 resolved_val = training_progress.get("batch_size")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
132 val = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
133 "Auto-selected batch size by Ludwig:<br>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
134 f"<span style='font-size: 0.85em;'>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
135 f"{resolved_val if resolved_val else val}</span><br>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
136 "<span style='font-size: 0.85em;'>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
137 "Based on model architecture and training setup "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
138 "(e.g., fine-tuning).<br>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
139 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
140 "#trainer-parameters' target='_blank'>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
141 "Ludwig Trainer Parameters</a> for details."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
142 "</span>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
143 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
144 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
145 val = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
146 "Auto-selected by Ludwig<br>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
147 "<span style='font-size: 0.85em;'>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
148 "Automatically tuned based on architecture and dataset.<br>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
149 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
150 "#trainer-parameters' target='_blank'>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
151 "Ludwig Trainer Parameters</a> for details."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
152 "</span>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
153 )
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
154 elif key == "learning_rate":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
155 if val is not None and val != "auto":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
156 val_str = f"{val:.6f}"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
157 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
158 if training_progress:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
159 resolved_val = training_progress.get("learning_rate")
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
160 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
161 "Auto-selected learning rate by Ludwig:<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
162 f"<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
163 f"{resolved_val if resolved_val else 'auto'}</span><br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
164 "<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
165 "Based on model architecture and training setup "
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
166 "(e.g., fine-tuning).<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
167 "</span>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
168 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
169 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
170 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
171 "Auto-selected by Ludwig<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
172 "<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
173 "Automatically tuned based on architecture and dataset.<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
174 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
175 "#trainer-parameters' target='_blank'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
176 "Ludwig Trainer Parameters</a> for details."
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
177 "</span>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
178 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
179 elif key == "epochs":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
180 if val is None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
181 val_str = "N/A"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
182 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
183 if (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
184 training_progress
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
185 and "epoch" in training_progress
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
186 and val > training_progress["epoch"]
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
187 ):
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
188 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
189 f"Because of early stopping: the training "
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
190 f"stopped at epoch {training_progress['epoch']}"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
191 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
192 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
193 val_str = val
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
194 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
195 val_str = val if val is not None else "N/A"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
196 if val_str == "N/A" and key not in ["task_type"]:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
197 continue
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
198 rows.append(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
199 f"<tr>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
200 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
201 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
202 f"{key.replace('_', ' ').title()}</td>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
203 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
204 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
205 f"{val_str}</td>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
206 f"</tr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
207 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
208
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
209 aug_cfg = config.get("augmentation")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
210 if aug_cfg:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
211 types = [str(a.get("type", "")) for a in aug_cfg]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
212 aug_val = ", ".join(types)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
213 rows.append(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
214 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
215 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
216 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
217 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>"
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
218 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
219
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
220 if split_info:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
221 rows.append(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
222 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
223 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
224 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
225 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
226 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
227
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
228 html = f"""
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
229 <h2 style="text-align: center;">Model and Training Summary</h2>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
230 <div style="display: flex; justify-content: center;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
231 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
232 <thead><tr>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
233 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
234 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
235 </tr></thead>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
236 <tbody>
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
237 {"".join(rows)}
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
238 </tbody>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
239 </table>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
240 </div><br>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
241 <p style="text-align: center; font-size: 0.9em;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
242 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>.
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
243 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
244 Ludwig documentation provides detailed information about default model and training parameters
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
245 </a>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
246 </p><hr>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
247 """
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
248 return html
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
249
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
250
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
251 def detect_output_type(test_stats):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
252 """Detects if the output type is 'binary' or 'category' based on test statistics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
253 label_stats = test_stats.get("label", {})
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
254 if "mean_squared_error" in label_stats:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
255 return "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
256 per_class = label_stats.get("per_class_stats", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
257 if len(per_class) == 2:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
258 return "binary"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
259 return "category"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
260
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
261
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
262 def extract_metrics_from_json(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
263 train_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
264 test_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
265 output_type: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
266 ) -> dict:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
267 """Extracts relevant metrics from training and test statistics based on the output type."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
268 metrics = {"training": {}, "validation": {}, "test": {}}
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
269
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
270 def get_last_value(stats, key):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
271 val = stats.get(key)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
272 if isinstance(val, list) and val:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
273 return val[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
274 elif isinstance(val, (int, float)):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
275 return val
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
276 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
277
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
278 for split in ["training", "validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
279 split_stats = train_stats.get(split, {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
280 if not split_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
281 logging.warning(f"No statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
282 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
283 label_stats = split_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
284 if not label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
285 logging.warning(f"No label statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
286 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
287 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
288 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
289 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
290 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
291 "precision": get_last_value(label_stats, "precision"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
292 "recall": get_last_value(label_stats, "recall"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
293 "specificity": get_last_value(label_stats, "specificity"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
294 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
295 }
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
296 elif output_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
297 metrics[split] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
298 "loss": get_last_value(label_stats, "loss"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
299 "mean_absolute_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
300 label_stats, "mean_absolute_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
301 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
302 "mean_absolute_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
303 label_stats, "mean_absolute_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
304 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
305 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
306 "root_mean_squared_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
307 label_stats, "root_mean_squared_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
308 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
309 "root_mean_squared_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
310 label_stats, "root_mean_squared_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
311 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
312 "r2": get_last_value(label_stats, "r2"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
313 }
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
314 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
315 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
316 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
317 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
318 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
319 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
320 "hits_at_k": get_last_value(label_stats, "hits_at_k"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
321 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
322
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
323 # Test metrics: dynamic extraction according to exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
324 test_label_stats = test_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
325 if not test_label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
326 logging.warning("No label statistics found for test split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
327 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
328 combined_stats = test_stats.get("combined", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
329 overall_stats = test_label_stats.get("overall_stats", {})
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
330
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
331 # Define exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
332 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
333 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
334 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
335 exclude = {"per_class_stats", "confusion_matrix"}
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
336
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
337 # 1. Get all scalar test_label_stats not excluded
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
338 test_metrics = {}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
339 for k, v in test_label_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
340 if k in exclude:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
341 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
342 if k == "overall_stats":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
343 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
344 if isinstance(v, (int, float, str, bool)):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
345 test_metrics[k] = v
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
346
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
347 # 2. Add overall_stats (flattened)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
348 for k, v in overall_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
349 test_metrics[k] = v
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
350
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
351 # 3. Optionally include combined/loss if present and not already
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
352 if "loss" in combined_stats and "loss" not in test_metrics:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
353 test_metrics["loss"] = combined_stats["loss"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
354 metrics["test"] = test_metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
355 return metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
356
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
357
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
358 def generate_table_row(cells, styles):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
359 """Helper function to generate an HTML table row."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
360 return (
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
361 "<tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
362 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
363 + "</tr>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
364 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
365
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
366
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
367 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
368 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
369 # -----------------------------------------
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
370 def format_stats_table_html(train_stats: dict, test_stats: dict, output_type: str) -> str:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
371 """Formats a combined HTML table for training, validation, and test metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
372 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
373 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
374 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
375 if (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
376 metric_key in all_metrics["validation"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
377 and metric_key in all_metrics["test"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
378 ):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
379 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
380 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
381 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
382 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
383 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
384 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
385 te = all_metrics["test"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
386 if all(x is not None for x in [t, v, te]):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
387 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
388
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
389 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
390 return "<table><tr><td>No metric values found.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
391
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
392 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
393 "<h2 style='text-align: center;'>Model Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
394 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
395 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
396 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
397 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
398 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
399 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
400 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
401 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
402 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
403 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
404 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
405 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
406 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
407 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
408 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
409 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
410
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
411
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
412 # -------------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
413 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
414 # -------------------------------------------
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
415 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
416 """Format train/validation metrics into an HTML table."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
417 all_metrics = extract_metrics_from_json(train_stats, test_stats, detect_output_type(test_stats))
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
418 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
419 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
420 if metric_key in all_metrics["validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
421 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
422 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
423 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
424 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
425 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
426 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
427 if t is not None and v is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
428 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
429
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
430 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
431 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
432
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
433 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
434 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
435 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
436 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
437 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
438 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
439 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
440 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
441 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
442 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
443 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
444 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
445 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
446 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
447 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
448 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
449 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
450
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
451
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
452 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
453 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
454 # -----------------------------------------
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
455 def format_test_merged_stats_table_html(
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
456 test_metrics: Dict[str, Any], output_type: str
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
457 ) -> str:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
458 """Format test metrics into an HTML table."""
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
459 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
460 for key in sorted(test_metrics.keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
461 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title())
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
462 value = test_metrics[key]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
463 if value is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
464 rows.append([display_name, f"{value:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
465
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
466 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
467 return "<table><tr><td>No test metric values found.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
468
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
469 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
470 "<h2 style='text-align: center;'>Test Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
471 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
472 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
473 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
474 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
475 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
476 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
477 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
478 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
479 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
480 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
481 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
482 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
483 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
484 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
485
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
486
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
487 def split_data_0_2(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
488 df: pd.DataFrame,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
489 split_column: str,
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
490 validation_size: float = 0.1,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
491 random_state: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
492 label_column: Optional[str] = None,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
493 ) -> pd.DataFrame:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
494 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation)."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
495 out = df.copy()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
496 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int)
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
497
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
498 idx_train = out.index[out[split_column] == 0].tolist()
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
499
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
500 if not idx_train:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
501 logger.info("No rows with split=0; nothing to do.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
502 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
503 stratify_arr = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
504 if label_column and label_column in out.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
505 label_counts = out.loc[idx_train, label_column].value_counts()
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
506 if label_counts.size > 1:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
507 # Force stratify even with fewer samples - adjust validation_size if needed
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
508 min_samples_per_class = label_counts.min()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
509 if min_samples_per_class * validation_size < 1:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
510 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
511 adjusted_validation_size = min(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
512 validation_size, 1.0 / min_samples_per_class
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
513 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
514 if adjusted_validation_size != validation_size:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
515 validation_size = adjusted_validation_size
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
516 logger.info(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
517 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
518 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
519 stratify_arr = out.loc[idx_train, label_column]
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
520 logger.info("Using stratified split for validation set")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
521 else:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
522 logger.warning("Only one label class found; cannot stratify")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
523 if validation_size <= 0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
524 logger.info("validation_size <= 0; keeping all as train.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
525 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
526 if validation_size >= 1:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
527 logger.info("validation_size >= 1; moving all train → validation.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
528 out.loc[idx_train, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
529 return out
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
530 # Always try stratified split first
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
531 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
532 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
533 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
534 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
535 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
536 stratify=stratify_arr,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
537 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
538 logger.info("Successfully applied stratified split")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
539 except ValueError as e:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
540 logger.warning(f"Stratified split failed ({e}); falling back to random split.")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
541 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
542 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
543 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
544 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
545 stratify=None,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
546 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
547 out.loc[train_idx, split_column] = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
548 out.loc[val_idx, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
549 out[split_column] = out[split_column].astype(int)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
550 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
551
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
552
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
553 def create_stratified_random_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
554 df: pd.DataFrame,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
555 split_column: str,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
556 split_probabilities: list = [0.7, 0.1, 0.2],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
557 random_state: int = 42,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
558 label_column: Optional[str] = None,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
559 ) -> pd.DataFrame:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
560 """Create a stratified random split when no split column exists."""
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
561 out = df.copy()
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
562
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
563 # initialize split column
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
564 out[split_column] = 0
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
565
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
566 if not label_column or label_column not in out.columns:
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
567 logger.warning(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
568 "No label column found; using random split without stratification"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
569 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
570 # fall back to simple random assignment
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
571 indices = out.index.tolist()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
572 np.random.seed(random_state)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
573 np.random.shuffle(indices)
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
574
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
575 n_total = len(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
576 n_train = int(n_total * split_probabilities[0])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
577 n_val = int(n_total * split_probabilities[1])
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
578
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
579 out.loc[indices[:n_train], split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
580 out.loc[indices[n_train:n_train + n_val], split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
581 out.loc[indices[n_train + n_val:], split_column] = 2
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
582
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
583 return out.astype({split_column: int})
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
584
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
585 # check if stratification is possible
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
586 label_counts = out[label_column].value_counts()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
587 min_samples_per_class = label_counts.min()
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
588
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
589 # ensure we have enough samples for stratification:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
590 # Each class must have at least as many samples as the number of splits,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
591 # so that each split can receive at least one sample per class.
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
592 min_samples_required = len(split_probabilities)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
593 if min_samples_per_class < min_samples_required:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
594 logger.warning(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
595 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split"
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
596 )
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
597 # fall back to simple random assignment
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
598 indices = out.index.tolist()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
599 np.random.seed(random_state)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
600 np.random.shuffle(indices)
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
601
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
602 n_total = len(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
603 n_train = int(n_total * split_probabilities[0])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
604 n_val = int(n_total * split_probabilities[1])
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
605
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
606 out.loc[indices[:n_train], split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
607 out.loc[indices[n_train:n_train + n_val], split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
608 out.loc[indices[n_train + n_val:], split_column] = 2
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
609
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
610 return out.astype({split_column: int})
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
611
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
612 logger.info("Using stratified random split for train/validation/test sets")
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
613
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
614 # first split: separate test set
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
615 train_val_idx, test_idx = train_test_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
616 out.index.tolist(),
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
617 test_size=split_probabilities[2],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
618 random_state=random_state,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
619 stratify=out[label_column],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
620 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
621
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
622 # second split: separate training and validation from remaining data
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
623 val_size_adjusted = split_probabilities[1] / (
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
624 split_probabilities[0] + split_probabilities[1]
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
625 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
626 train_idx, val_idx = train_test_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
627 train_val_idx,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
628 test_size=val_size_adjusted,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
629 random_state=random_state,
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
630 stratify=out.loc[train_val_idx, label_column] if label_column and label_column in out.columns else None,
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
631 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
632
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
633 # assign split values
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
634 out.loc[train_idx, split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
635 out.loc[val_idx, split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
636 out.loc[test_idx, split_column] = 2
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
637
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
638 logger.info("Successfully applied stratified random split")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
639 logger.info(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
640 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
641 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
642 return out.astype({split_column: int})
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
643
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
644
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
645 class Backend(Protocol):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
646 """Interface for a machine learning backend."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
647
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
648 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
649 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
650 config_params: Dict[str, Any],
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
651 split_config: Dict[str, Any],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
652 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
653 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
654
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
655 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
656 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
657 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
658 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
659 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
660 random_seed: int,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
661 ) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
662 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
663
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
664 def generate_plots(self, output_dir: Path) -> None:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
665 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
666
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
667 def generate_html_report(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
668 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
669 title: str,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
670 output_dir: str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
671 config: Dict[str, Any],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
672 split_info: str,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
673 ) -> Path:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
674 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
675
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
676
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
677 class LudwigDirectBackend:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
678 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
679
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
680 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
681 """Detect image dimensions from the first image in the dataset."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
682 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
683 import zipfile
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
684 from PIL import Image
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
685 import io
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
686
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
687 # Check if image_zip is provided
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
688 if not image_zip_path:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
689 logger.warning("No image zip provided, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
690 return 224, 224
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
691
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
692 # Extract first image to detect dimensions
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
693 with zipfile.ZipFile(image_zip_path, 'r') as z:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
694 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
695 if not image_files:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
696 logger.warning("No image files found in zip, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
697 return 224, 224
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
698
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
699 # Check first image
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
700 with z.open(image_files[0]) as f:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
701 img = Image.open(io.BytesIO(f.read()))
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
702 width, height = img.size
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
703 logger.info(f"Detected image dimensions: {width}x{height}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
704 return height, width # Return as (height, width) to match encoder config
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
705
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
706 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
707 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
708 return 224, 224
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
709
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
710 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
711 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
712 config_params: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
713 split_config: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
714 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
715 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
716
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
717 model_name = config_params.get("model_name", "resnet18")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
718 use_pretrained = config_params.get("use_pretrained", False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
719 fine_tune = config_params.get("fine_tune", False)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
720 if use_pretrained:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
721 trainable = bool(fine_tune)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
722 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
723 trainable = True
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
724 epochs = config_params.get("epochs", 10)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
725 batch_size = config_params.get("batch_size")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
726 num_processes = config_params.get("preprocessing_num_processes", 1)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
727 early_stop = config_params.get("early_stop", None)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
728 learning_rate = config_params.get("learning_rate")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
729 learning_rate = "auto" if learning_rate is None else float(learning_rate)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
730 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name)
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
731
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
732 # --- MetaFormer detection and config logic ---
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
733 def _is_metaformer(name: str) -> bool:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
734 return isinstance(name, str) and name.startswith(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
735 (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
736 "identityformer_",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
737 "randformer_",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
738 "poolformerv2_",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
739 "convformer_",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
740 "caformer_",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
741 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
742 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
743
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
744 # Check if this is a MetaFormer model (either direct name or in custom_model)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
745 is_metaformer = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
746 _is_metaformer(model_name)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
747 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"]))
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
748 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
749
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
750 metaformer_resize: Optional[Tuple[int, int]] = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
751 metaformer_channels = 3
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
752
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
753 if is_metaformer:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
754 # Handle MetaFormer models
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
755 custom_model = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
756 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
757 custom_model = raw_encoder["custom_model"]
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
758 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
759 custom_model = model_name
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
760
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
761 logger.info(f"DETECTED MetaFormer model: {custom_model}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
762 cfg_channels, cfg_height, cfg_width = 3, 224, 224
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
763 if META_DEFAULT_CFGS:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
764 model_cfg = META_DEFAULT_CFGS.get(custom_model, {})
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
765 input_size = model_cfg.get("input_size")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
766 if isinstance(input_size, (list, tuple)) and len(input_size) == 3:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
767 cfg_channels, cfg_height, cfg_width = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
768 int(input_size[0]),
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
769 int(input_size[1]),
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
770 int(input_size[2]),
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
771 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
772
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
773 target_height, target_width = cfg_height, cfg_width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
774 resize_value = config_params.get("image_resize")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
775 if resize_value and resize_value != "original":
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
776 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
777 dimensions = resize_value.split("x")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
778 if len(dimensions) == 2:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
779 target_height, target_width = int(dimensions[0]), int(dimensions[1])
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
780 if target_height <= 0 or target_width <= 0:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
781 raise ValueError(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
782 f"Image resize must be positive integers, received {resize_value}."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
783 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
784 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
785 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
786 raise ValueError(resize_value)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
787 except (ValueError, IndexError):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
788 logger.warning(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
789 "Invalid image resize format '%s'; falling back to model default %sx%s",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
790 resize_value,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
791 cfg_height,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
792 cfg_width,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
793 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
794 target_height, target_width = cfg_height, cfg_width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
795 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
796 image_zip_path = config_params.get("image_zip", "")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
797 detected_height, detected_width = self._detect_image_dimensions(image_zip_path)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
798 if use_pretrained:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
799 if (detected_height, detected_width) != (cfg_height, cfg_width):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
800 logger.info(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
801 "MetaFormer pretrained weights expect %sx%s; resizing from detected %sx%s",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
802 cfg_height,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
803 cfg_width,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
804 detected_height,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
805 detected_width,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
806 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
807 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
808 target_height, target_width = detected_height, detected_width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
809 if target_height <= 0 or target_width <= 0:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
810 raise ValueError(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
811 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
812 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
813
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
814 metaformer_channels = cfg_channels
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
815 metaformer_resize = (target_height, target_width)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
816
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
817 encoder_config = {
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
818 "type": "stacked_cnn",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
819 "height": target_height,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
820 "width": target_width,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
821 "num_channels": metaformer_channels,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
822 "output_size": 128,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
823 "use_pretrained": use_pretrained,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
824 "trainable": trainable,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
825 "custom_model": custom_model,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
826 }
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
827
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
828 elif isinstance(raw_encoder, dict):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
829 # Handle image resize for regular encoders
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
830 # Note: Standard encoders like ResNet don't support height/width parameters
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
831 # Resize will be handled at the preprocessing level by Ludwig
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
832 if config_params.get("image_resize") and config_params["image_resize"] != "original":
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
833 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
834
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
835 encoder_config = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
836 **raw_encoder,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
837 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
838 "trainable": trainable,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
839 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
840 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
841 encoder_config = {"type": raw_encoder}
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
842
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
843 batch_size_cfg = batch_size or "auto"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
844
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
845 label_column_path = config_params.get("label_column_data_path")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
846 label_series = None
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
847 if label_column_path is not None and Path(label_column_path).exists():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
848 try:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
849 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
850 except Exception as e:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
851 logger.warning(f"Could not read label column for task detection: {e}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
852
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
853 if (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
854 label_series is not None
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
855 and ptypes.is_numeric_dtype(label_series.dtype)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
856 and label_series.nunique() > 10
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
857 ):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
858 task_type = "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
859 else:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
860 task_type = "classification"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
861
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
862 config_params["task_type"] = task_type
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
863
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
864 image_feat: Dict[str, Any] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
865 "name": IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
866 "type": "image",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
867 }
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
868 # Set preprocessing dimensions FIRST for MetaFormer models
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
869 if is_metaformer:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
870 if metaformer_resize is None:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
871 metaformer_resize = (224, 224)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
872 height, width = metaformer_resize
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
873
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
874 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
875 # This is essential for MetaFormer models to work properly
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
876 if "preprocessing" not in image_feat:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
877 image_feat["preprocessing"] = {}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
878 image_feat["preprocessing"]["height"] = height
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
879 image_feat["preprocessing"]["width"] = width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
880 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
881 # but set explicit max dimensions to control the output size
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
882 image_feat["preprocessing"]["infer_image_dimensions"] = True
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
883 image_feat["preprocessing"]["infer_image_max_height"] = height
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
884 image_feat["preprocessing"]["infer_image_max_width"] = width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
885 image_feat["preprocessing"]["num_channels"] = metaformer_channels
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
886 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
887 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
888 # Force Ludwig to respect our dimensions by setting additional parameters
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
889 image_feat["preprocessing"]["requires_equal_dimensions"] = False
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
890 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
891 # Now set the encoder configuration
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
892 image_feat["encoder"] = encoder_config
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
893
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
894 if config_params.get("augmentation") is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
895 image_feat["augmentation"] = config_params["augmentation"]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
896
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
897 # Add resize configuration for standard encoders (ResNet, etc.)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
898 # FIXED: MetaFormer models now respect user dimensions completely
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
899 # Previously there was a double resize issue where MetaFormer would force 224x224
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
900 # Now both MetaFormer and standard encoders respect user's resize choice
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
901 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original":
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
902 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
903 dimensions = config_params["image_resize"].split("x")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
904 if len(dimensions) == 2:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
905 height, width = int(dimensions[0]), int(dimensions[1])
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
906 if height <= 0 or width <= 0:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
907 raise ValueError(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
908 f"Image resize must be positive integers, received {config_params['image_resize']}."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
909 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
910
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
911 # Add resize to preprocessing for standard encoders
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
912 if "preprocessing" not in image_feat:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
913 image_feat["preprocessing"] = {}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
914 image_feat["preprocessing"]["height"] = height
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
915 image_feat["preprocessing"]["width"] = width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
916 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
917 # but set explicit max dimensions to control the output size
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
918 image_feat["preprocessing"]["infer_image_dimensions"] = True
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
919 image_feat["preprocessing"]["infer_image_max_height"] = height
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
920 image_feat["preprocessing"]["infer_image_max_width"] = width
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
921 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
922 except (ValueError, IndexError):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
923 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
924 if task_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
925 output_feat = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
926 "name": LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
927 "type": "number",
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
928 "decoder": {"type": "regressor", "input_size": 1},
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
929 "loss": {"type": "mean_squared_error"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
930 "evaluation": {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
931 "metrics": [
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
932 "mean_squared_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
933 "mean_absolute_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
934 "r2",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
935 ]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
936 },
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
937 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
938 val_metric = config_params.get("validation_metric", "mean_squared_error")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
939
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
940 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
941 num_unique_labels = (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
942 label_series.nunique() if label_series is not None else 2
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
943 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
944 output_type = "binary" if num_unique_labels == 2 else "category"
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
945 # Determine if this is regression or classification based on label type
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
946 is_regression = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
947 label_series is not None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
948 and ptypes.is_numeric_dtype(label_series.dtype)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
949 and label_series.nunique() > 10
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
950 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
951
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
952 if is_regression:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
953 output_feat = {
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
954 "name": LABEL_COLUMN_NAME,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
955 "type": "number",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
956 "decoder": {"type": "regressor", "input_size": 1},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
957 "loss": {"type": "mean_squared_error"},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
958 }
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
959 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
960 if num_unique_labels == 2:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
961 output_feat = {
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
962 "name": LABEL_COLUMN_NAME,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
963 "type": "binary",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
964 "decoder": {"type": "classifier", "input_size": 1},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
965 "loss": {"type": "softmax_cross_entropy"},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
966 }
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
967 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
968 output_feat = {
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
969 "name": LABEL_COLUMN_NAME,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
970 "type": "category",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
971 "decoder": {"type": "classifier", "input_size": num_unique_labels},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
972 "loss": {"type": "softmax_cross_entropy"},
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
973 }
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
974 if output_type == "binary" and config_params.get("threshold") is not None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
975 output_feat["threshold"] = float(config_params["threshold"])
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
976 val_metric = None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
977
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
978 conf: Dict[str, Any] = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
979 "model_type": "ecd",
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
980 "input_features": [image_feat],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
981 "output_features": [output_feat],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
982 "combiner": {"type": "concat"},
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
983 "trainer": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
984 "epochs": epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
985 "early_stop": early_stop,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
986 "batch_size": batch_size_cfg,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
987 "learning_rate": learning_rate,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
988 # only set validation_metric for regression
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
989 **({"validation_metric": val_metric} if val_metric else {}),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
990 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
991 "preprocessing": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
992 "split": split_config,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
993 "num_processes": num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
994 "in_memory": False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
995 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
996 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
997
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
998 logger.debug("LudwigDirectBackend: Config dict built.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
999 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1000 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1001 logger.info("LudwigDirectBackend: YAML config generated.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1002 return yaml_str
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1003 except Exception:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1004 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1005 "LudwigDirectBackend: Failed to serialize YAML.",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1006 exc_info=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1007 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1008 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1009
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1010 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1011 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1012 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1013 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1014 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1015 random_seed: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1016 ) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1017 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1018 logger.info("LudwigDirectBackend: Starting experiment execution.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1019
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1020 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1021 from ludwig.experiment import experiment_cli
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1022 except ImportError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1023 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1024 "LudwigDirectBackend: Could not import experiment_cli.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1025 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1026 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1027 raise RuntimeError("Ludwig import failed.") from e
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1028
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1029 output_dir.mkdir(parents=True, exist_ok=True)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1030
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1031 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1032 experiment_cli(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1033 dataset=str(dataset_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1034 config=str(config_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1035 output_directory=str(output_dir),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1036 random_seed=random_seed,
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1037 skip_preprocessing=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1038 )
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1039 logger.info(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1040 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1041 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1042 except TypeError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1043 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1044 "LudwigDirectBackend: Argument mismatch in experiment_cli call.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1045 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1046 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1047 raise RuntimeError("Ludwig argument error.") from e
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1048 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1049 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1050 "LudwigDirectBackend: Experiment execution error.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1051 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1052 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1053 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1054
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1055 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1056 """Retrieve the learning rate used in the most recent Ludwig run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1057 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1058 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1059 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1060 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1061 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1062
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1063 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1064 logger.warning(f"No experiment run directories found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1065 return None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1066
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1067 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1068 if not progress_file.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1069 logger.warning(f"No training_progress.json found in {progress_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1070 return None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1071
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1072 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1073 with progress_file.open("r", encoding="utf-8") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1074 data = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1075 return {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1076 "learning_rate": data.get("learning_rate"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1077 "batch_size": data.get("batch_size"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1078 "epoch": data.get("epoch"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1079 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1080 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1081 logger.warning(f"Failed to read training progress info: {e}")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1082 return {}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1083
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1084 def convert_parquet_to_csv(self, output_dir: Path):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1085 """Convert the predictions Parquet file to CSV."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1086 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1087 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1088 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1089 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1090 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1091 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1092 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1093 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1094 exp_dir = exp_dirs[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1095 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1096 csv_path = exp_dir / "predictions.csv"
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1097
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1098 # Check if parquet file exists before trying to convert
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1099 if not parquet_path.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1100 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1101 return
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1102
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1103 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1104 df = pd.read_parquet(parquet_path)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1105 df.to_csv(csv_path, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1106 logger.info(f"Converted Parquet to CSV: {csv_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1107 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1108 logger.error(f"Error converting Parquet to CSV: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1109
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1110 def generate_plots(self, output_dir: Path) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1111 """Generate all registered Ludwig visualizations for the latest experiment run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1112 logger.info("Generating all Ludwig visualizations…")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1113
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1114 test_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1115 "compare_performance",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1116 "compare_classifiers_performance_from_prob",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1117 "compare_classifiers_performance_from_pred",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1118 "compare_classifiers_performance_changing_k",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1119 "compare_classifiers_multiclass_multimetric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1120 "compare_classifiers_predictions",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1121 "confidence_thresholding_2thresholds_2d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1122 "confidence_thresholding_2thresholds_3d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1123 "confidence_thresholding",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1124 "confidence_thresholding_data_vs_acc",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1125 "binary_threshold_vs_metric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1126 "roc_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1127 "roc_curves_from_test_statistics",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1128 "calibration_1_vs_all",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1129 "calibration_multiclass",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1130 "confusion_matrix",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1131 "frequency_vs_f1",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1132 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1133 train_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1134 "learning_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1135 "compare_classifiers_performance_subset",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1136 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1137
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1138 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1139 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1140 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1141 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1142 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1143 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1144 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1145 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1146 exp_dir = exp_dirs[-1]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1147
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1148 viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1149 viz_dir.mkdir(exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1150 train_viz = viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1151 test_viz = viz_dir / "test"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1152 train_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1153 test_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1154
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1155 def _check(p: Path) -> Optional[str]:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1156 return str(p) if p.exists() else None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1157
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1158 training_stats = _check(exp_dir / "training_statistics.json")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1159 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1160 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1161 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1162
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1163 dataset_path = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1164 split_file = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1165 desc = exp_dir / DESCRIPTION_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1166 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1167 with open(desc, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1168 cfg = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1169 dataset_path = _check(Path(cfg.get("dataset", "")))
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1170 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1171
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1172 output_feature = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1173 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1174 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1175 output_feature = cfg["config"]["output_features"][0]["name"]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1176 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1177 pass
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1178 if not output_feature and test_stats:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1179 with open(test_stats, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1180 stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1181 output_feature = next(iter(stats.keys()), "")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1182
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1183 viz_registry = get_visualizations_registry()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1184 for viz_name, viz_func in viz_registry.items():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1185 if viz_name in train_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1186 viz_dir_plot = train_viz
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1187 elif viz_name in test_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1188 viz_dir_plot = test_viz
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1189 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1190 continue
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1191
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1192 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1193 viz_func(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1194 training_statistics=[training_stats] if training_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1195 test_statistics=[test_stats] if test_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1196 probabilities=[probs_path] if probs_path else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1197 output_feature_name=output_feature,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1198 ground_truth_split=2,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1199 top_n_classes=[0],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1200 top_k=3,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1201 ground_truth_metadata=gt_metadata,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1202 ground_truth=dataset_path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1203 split_file=split_file,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1204 output_directory=str(viz_dir_plot),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1205 normalize=False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1206 file_format="png",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1207 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1208 logger.info(f"✔ Generated {viz_name}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1209 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1210 logger.warning(f"✘ Skipped {viz_name}: {e}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1211
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1212 logger.info(f"All visualizations written to {viz_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1213
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1214 def generate_html_report(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1215 self,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1216 title: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1217 output_dir: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1218 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1219 split_info: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1220 ) -> Path:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1221 """Assemble an HTML report from visualizations under train_val/ and test/ folders."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1222 cwd = Path.cwd()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1223 report_name = title.lower().replace(" ", "_") + "_report.html"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1224 report_path = cwd / report_name
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1225 output_dir = Path(output_dir)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1226 output_type = None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1227
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1228 exp_dirs = sorted(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1229 output_dir.glob("experiment_run*"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1230 key=lambda p: p.stat().st_mtime,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1231 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1232 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1233 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1234 exp_dir = exp_dirs[-1]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1235
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1236 base_viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1237 train_viz_dir = base_viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1238 test_viz_dir = base_viz_dir / "test"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1239
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1240 html = get_html_template()
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1241
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1242 # Extra CSS & JS: center Plotly and enable CSV download for predictions table
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1243 html += """
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1244 <style>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1245 /* Center Plotly figures (both wrapper and native classes) */
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1246 .plotly-center { display: flex; justify-content: center; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1247 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1248 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1249
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1250 /* Download button for predictions table */
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1251 .download-btn {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1252 padding: 8px 12px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1253 border: 1px solid #4CAF50;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1254 background: #4CAF50;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1255 color: white;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1256 border-radius: 6px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1257 cursor: pointer;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1258 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1259 .download-btn:hover { filter: brightness(0.95); }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1260 .preds-controls {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1261 display: flex;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1262 justify-content: flex-end;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1263 gap: 8px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1264 margin: 8px 0;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1265 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1266 </style>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1267 <script>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1268 function tableToCSV(table){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1269 const rows = Array.from(table.querySelectorAll('tr'));
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1270 return rows.map(row =>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1271 Array.from(row.querySelectorAll('th,td')).map(cell => {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1272 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim();
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1273 if (text.includes('"') || text.includes(',')) {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1274 text = '"' + text.replace(/"/g,'""') + '"';
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1275 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1276 return text;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1277 }).join(',')
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1278 ).join('\\n');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1279 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1280 document.addEventListener('DOMContentLoaded', function(){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1281 const btn = document.getElementById('downloadPredsCsv');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1282 if(btn){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1283 btn.addEventListener('click', function(){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1284 const tbl = document.querySelector('.predictions-table');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1285 if(!tbl){ alert('Predictions table not found.'); return; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1286 const csv = tableToCSV(tbl);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1287 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1288 const url = URL.createObjectURL(blob);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1289 const a = document.createElement('a');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1290 a.href = url;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1291 a.download = 'ground_truth_vs_predictions.csv';
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1292 document.body.appendChild(a);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1293 a.click();
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1294 document.body.removeChild(a);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1295 URL.revokeObjectURL(url);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1296 });
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1297 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1298 });
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1299 </script>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1300 """
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1301 html += f"<h1>{title}</h1>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1302
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1303 metrics_html = ""
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1304 train_val_metrics_html = ""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1305 test_metrics_html = ""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1306 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1307 train_stats_path = exp_dir / "training_statistics.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1308 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1309 if train_stats_path.exists() and test_stats_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1310 with open(train_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1311 train_stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1312 with open(test_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1313 test_stats = json.load(f)
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1314 output_type = detect_output_type(test_stats)
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1315 metrics_html = format_stats_table_html(train_stats, test_stats, output_type)
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1316 train_val_metrics_html = format_train_val_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1317 train_stats, test_stats
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1318 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1319 test_metrics_html = format_test_merged_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1320 extract_metrics_from_json(train_stats, test_stats, output_type)[
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1321 "test"
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1322 ], output_type
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1323 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1324 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1325 logger.warning(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1326 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1327 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1328
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1329 config_html = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1330 training_progress = self.get_training_process(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1331 try:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1332 config_html = format_config_table_html(
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1333 config, split_info, training_progress, output_type
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1334 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1335 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1336 logger.warning(f"Could not load config for HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1337
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1338 # ---------- image rendering with exclusions ----------
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1339 def render_img_section(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1340 title: str,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1341 dir_path: Path,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1342 output_type: str = None,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1343 exclude_names: Optional[set] = None,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1344 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1345 if not dir_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1346 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1347
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1348 exclude_names = exclude_names or set()
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1349
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1350 imgs = list(dir_path.glob("*.png"))
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1351
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1352 # Exclude ROC curves and standard confusion matrices (keep only entropy version)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1353 default_exclude = {
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1354 # "roc_curves.png", # Remove ROC curves from test tab
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1355 "confusion_matrix__label_top5.png", # Remove standard confusion matrix
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1356 "confusion_matrix__label_top10.png", # Remove duplicate
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1357 "confusion_matrix__label_top6.png", # Remove duplicate
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1358 "confusion_matrix_entropy__label_top10.png", # Keep only top5
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1359 "confusion_matrix_entropy__label_top6.png", # Keep only top5
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1360 }
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1361
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1362 imgs = [
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1363 img
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1364 for img in imgs
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1365 if img.name not in default_exclude
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1366 and img.name not in exclude_names
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1367 ]
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1368
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1369 if not imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1370 return f"<h2>{title}</h2><p><em>No plots found.</em></p>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1371
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1372 # Sort images by name for consistent ordering (works with string and numeric labels)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1373 imgs = sorted(imgs, key=lambda x: x.name)
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1374
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1375 html_section = ""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1376 for img in imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1377 b64 = encode_image_to_base64(str(img))
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1378 img_title = img.stem.replace("_", " ").title()
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1379 html_section += (
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1380 f"<h2 style='text-align: center;'>{img_title}</h2>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1381 f'<div class="plot" style="margin-bottom:20px;text-align:center;">'
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1382 f'<img src="data:image/png;base64,{b64}" '
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1383 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />'
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1384 f"</div>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1385 )
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1386 return html_section
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1387
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1388 tab1_content = config_html + metrics_html
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1389
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1390 tab2_content = train_val_metrics_html + render_img_section(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1391 "Training and Validation Visualizations",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1392 train_viz_dir,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1393 output_type,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1394 exclude_names={
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1395 "compare_classifiers_performance_from_prob.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1396 "roc_curves_from_prediction_statistics.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1397 "precision_recall_curves_from_prediction_statistics.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1398 "precision_recall_curve.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1399 },
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1400 )
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1401
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1402 # --- Predictions vs Ground Truth table (REGRESSION ONLY) ---
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1403 preds_section = ""
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1404 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1405 if output_type == "regression" and parquet_path.exists():
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1406 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1407 # 1) load predictions from Parquet
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1408 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1409 # assume the column containing your model's prediction is named "prediction"
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1410 # or contains that substring:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1411 pred_col = next(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1412 (c for c in df_preds.columns if "prediction" in c.lower()),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1413 None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1414 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1415 if pred_col is None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1416 raise ValueError("No prediction column found in Parquet output")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1417 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1418
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1419 # 2) load ground truth for the test split from prepared CSV
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1420 df_all = pd.read_csv(config["label_column_data_path"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1421 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1422 LABEL_COLUMN_NAME
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1423 ].reset_index(drop=True)
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1424 # 3) concatenate side-by-side
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1425 df_table = pd.concat([df_gt, df_pred], axis=1)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1426 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1427
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1428 # 4) render as HTML
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1429 preds_html = df_table.to_html(index=False, classes="predictions-table")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1430 preds_section = (
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1431 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1432 "<div class='preds-controls'>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1433 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1434 "</div>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1435 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>"
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1436 + preds_html
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1437 + "</div>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1438 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1439 except Exception as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1440 logger.warning(f"Could not build Predictions vs GT table: {e}")
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1441
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1442 tab3_content = test_metrics_html + preds_section
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1443
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1444 if output_type in ("binary", "category") and test_stats_path.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1445 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1446 interactive_plots = build_classification_plots(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1447 str(test_stats_path),
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1448 str(train_stats_path) if train_stats_path.exists() else None,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1449 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1450 for plot in interactive_plots:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1451 tab3_content += (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1452 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1453 f"<div class='plotly-center'>{plot['html']}</div>"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1454 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1455 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1456 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1457 logger.warning(f"Could not generate Plotly plots: {e}")
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1458
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1459 # Add static TEST PNGs (with default dedupe/exclusions)
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1460 tab3_content += render_img_section(
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1461 "Test Visualizations", test_viz_dir, output_type
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1462 )
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1463
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1464 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1465 modal_html = get_metrics_help_modal()
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1466 html += tabbed_html + modal_html + get_html_closing()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1467
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1468 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1469 with open(report_path, "w") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1470 f.write(html)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1471 logger.info(f"HTML report generated at: {report_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1472 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1473 logger.error(f"Failed to write HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1474 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1475
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1476 return report_path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1477
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1478
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1479 class WorkflowOrchestrator:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1480 """Manages the image-classification workflow."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1481
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1482 def __init__(self, args: argparse.Namespace, backend: Backend):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1483 self.args = args
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1484 self.backend = backend
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1485 self.temp_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1486 self.image_extract_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1487 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1488
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1489 def run(self) -> None:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1490 """Execute the full workflow end-to-end."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1491 # Delegate to the backend's run_experiment method
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1492 self.backend.run_experiment()
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1493
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1494
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1495 class ImageLearnerCLI:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1496 """Manages the image-classification workflow."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1497
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1498 def __init__(self, args: argparse.Namespace, backend: Backend):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1499 self.args = args
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1500 self.backend = backend
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1501 self.temp_dir: Optional[Path] = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1502 self.image_extract_dir: Optional[Path] = None
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1503 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1504
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1505 def _create_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1506 """Create temporary output and image extraction directories."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1507 try:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1508 self.temp_dir = Path(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1509 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1510 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1511 self.image_extract_dir = self.temp_dir / "images"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1512 self.image_extract_dir.mkdir()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1513 logger.info(f"Created temp directory: {self.temp_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1514 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1515 logger.error("Failed to create temporary directories", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1516 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1517
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1518 def _extract_images(self) -> None:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1519 """Extract images into the temp image directory.
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1520 - If a ZIP file is provided, extract it
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1521 - If a directory is provided, copy its contents
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1522 """
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1523 if self.image_extract_dir is None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1524 raise RuntimeError("Temp image directory not initialized.")
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1525 src = Path(self.args.image_zip)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1526 logger.info(f"Preparing images from {src} → {self.image_extract_dir}")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1527 try:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1528 if src.is_dir():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1529 # copy directory tree
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1530 for root, dirs, files in os.walk(src):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1531 rel = Path(root).relative_to(src)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1532 target_root = self.image_extract_dir / rel
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1533 target_root.mkdir(parents=True, exist_ok=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1534 for fn in files:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1535 shutil.copy2(Path(root) / fn, target_root / fn)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1536 logger.info("Image directory copied.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1537 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1538 with zipfile.ZipFile(src, "r") as z:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1539 z.extractall(self.image_extract_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1540 logger.info("Image extraction complete.")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1541 except Exception:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1542 logger.error("Error preparing images", exc_info=True)
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1543 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1544
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1545 def _process_fixed_split(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1546 self, df: pd.DataFrame
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1547 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1548 """Process datasets that already have a split column."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1549 unique = set(df[SPLIT_COLUMN_NAME].unique())
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1550 if unique == {0, 2}:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1551 # Split 0/2 detected, create validation set
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1552 df = split_data_0_2(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1553 df=df,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1554 split_column=SPLIT_COLUMN_NAME,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1555 validation_size=self.args.validation_size,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1556 random_state=self.args.random_seed,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1557 label_column=LABEL_COLUMN_NAME,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1558 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1559 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1560 split_info = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1561 "Detected a split column (with values 0 and 2) in the input CSV. "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1562 f"Used this column as a base and reassigned "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1563 f"{self.args.validation_size * 100:.1f}% "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1564 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1565 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1566 logger.info("Applied custom 0/2 split.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1567 elif unique.issubset({0, 1, 2}):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1568 # Standard 0/1/2 split
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1569 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME}
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1570 split_info = (
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1571 "Detected a split column with train(0)/validation(1)/test(2) "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1572 "values in the input CSV. Used this column as-is."
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1573 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1574 logger.info("Fixed split column detected.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1575 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1576 raise ValueError(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1577 f"Split column contains unexpected values: {unique}. "
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1578 "Expected: {{0,1,2}} or {{0,2}}"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1579 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1580
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1581 return df, split_config, split_info
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1582
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1583 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1584 """Load CSV, update image paths, handle splits, and write prepared CSV."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1585 if not self.temp_dir or not self.image_extract_dir:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1586 raise RuntimeError("Temp dirs not initialized before data prep.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1587
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1588 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1589 df = pd.read_csv(self.args.csv_file)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1590 logger.info(f"Loaded CSV: {self.args.csv_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1591 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1592 logger.error("Error loading CSV file", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1593 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1594
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1595 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1596 missing = required - set(df.columns)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1597 if missing:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1598 raise ValueError(f"Missing CSV columns: {', '.join(missing)}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1599
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1600 try:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1601 # Use relative paths that Ludwig can resolve from its internal working directory
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1602 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply(
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1603 lambda p: str(Path("images") / p)
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1604 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1605 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1606 logger.error("Error updating image paths", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1607 raise
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1608
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1609 if SPLIT_COLUMN_NAME in df.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1610 df, split_config, split_info = self._process_fixed_split(df)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1611 else:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1612 logger.info("No split column; creating stratified random split")
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1613 df = create_stratified_random_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1614 df=df,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1615 split_column=SPLIT_COLUMN_NAME,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1616 split_probabilities=self.args.split_probabilities,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1617 random_state=self.args.random_seed,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1618 label_column=LABEL_COLUMN_NAME,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1619 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1620 split_config = {
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1621 "type": "fixed",
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1622 "column": SPLIT_COLUMN_NAME,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1623 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1624 split_info = (
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1625 f"No split column in CSV. Created stratified random split: "
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1626 f"{[int(p * 100) for p in self.args.split_probabilities]}% "
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1627 f"for train/val/test with balanced label distribution."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1628 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1629
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1630 final_csv = self.temp_dir / TEMP_CSV_FILENAME
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1631
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1632 try:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1633
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1634 df.to_csv(final_csv, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1635 logger.info(f"Saved prepared data to {final_csv}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1636 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1637 logger.error("Error saving prepared CSV", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1638 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1639
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1640 return final_csv, split_config, split_info
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1641
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1642 # Removed duplicate method
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1643
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1644 def _detect_image_dimensions(self) -> Tuple[int, int]:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1645 """Detect image dimensions from the first image in the dataset."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1646 try:
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1647 import zipfile
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1648 from PIL import Image
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1649 import io
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1650
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1651 # Check if image_zip is provided
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1652 if not self.args.image_zip:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1653 logger.warning("No image zip provided, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1654 return 224, 224
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1655
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1656 # Extract first image to detect dimensions
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1657 with zipfile.ZipFile(self.args.image_zip, 'r') as z:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1658 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1659 if not image_files:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1660 logger.warning("No image files found in zip, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1661 return 224, 224
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1662
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1663 # Check first image
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1664 with z.open(image_files[0]) as f:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1665 img = Image.open(io.BytesIO(f.read()))
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1666 width, height = img.size
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1667 logger.info(f"Detected image dimensions: {width}x{height}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1668 return height, width # Return as (height, width) to match encoder config
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1669
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1670 except Exception as e:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1671 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1672 return 224, 224
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1673
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1674 def _cleanup_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1675 if self.temp_dir and self.temp_dir.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1676 logger.info(f"Cleaning up temp directory: {self.temp_dir}")
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1677 # Don't clean up for debugging
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1678 shutil.rmtree(self.temp_dir, ignore_errors=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1679 self.temp_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1680 self.image_extract_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1681
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1682 def run(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1683 """Execute the full workflow end-to-end."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1684 logger.info("Starting workflow...")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1685 self.args.output_dir.mkdir(parents=True, exist_ok=True)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1686
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1687 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1688 self._create_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1689 self._extract_images()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1690 csv_path, split_cfg, split_info = self._prepare_data()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1691
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1692 use_pretrained = self.args.use_pretrained or self.args.fine_tune
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1693
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1694 backend_args = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1695 "model_name": self.args.model_name,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1696 "fine_tune": self.args.fine_tune,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1697 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1698 "epochs": self.args.epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1699 "batch_size": self.args.batch_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1700 "preprocessing_num_processes": self.args.preprocessing_num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1701 "split_probabilities": self.args.split_probabilities,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1702 "learning_rate": self.args.learning_rate,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1703 "random_seed": self.args.random_seed,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1704 "early_stop": self.args.early_stop,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1705 "label_column_data_path": csv_path,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1706 "augmentation": self.args.augmentation,
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1707 "image_resize": self.args.image_resize,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1708 "image_zip": self.args.image_zip,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1709 "threshold": self.args.threshold,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1710 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1711 yaml_str = self.backend.prepare_config(backend_args, split_cfg)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1712
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1713 config_file = self.temp_dir / TEMP_CONFIG_FILENAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1714 config_file.write_text(yaml_str)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1715 logger.info(f"Wrote backend config: {config_file}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1716
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1717 ran_ok = True
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1718 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1719 # Run Ludwig experiment with absolute paths to avoid working directory issues
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1720 self.backend.run_experiment(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1721 csv_path,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1722 config_file,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1723 self.args.output_dir,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1724 self.args.random_seed,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1725 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1726 except Exception:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1727 logger.error("Workflow execution failed", exc_info=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1728 ran_ok = False
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1729
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1730 if ran_ok:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1731 logger.info("Workflow completed successfully.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1732 # Generate a very small set of plots to conserve disk space
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1733 self.backend.generate_plots(self.args.output_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1734 # Build HTML report (robust to missing metrics)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1735 report_file = self.backend.generate_html_report(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1736 "Image Classification Results",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1737 self.args.output_dir,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1738 backend_args,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1739 split_info,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1740 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1741 logger.info(f"HTML report generated at: {report_file}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1742 # Convert predictions parquet → csv
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1743 self.backend.convert_parquet_to_csv(self.args.output_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1744 logger.info("Converted Parquet to CSV.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1745 # Post-process cleanup to reduce disk footprint for subsequent tests
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1746 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1747 self._postprocess_cleanup(self.args.output_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1748 except Exception as cleanup_err:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1749 logger.warning(f"Cleanup step failed: {cleanup_err}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1750 else:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1751 # Fallback: create minimal outputs so downstream steps can proceed
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1752 logger.warning("Falling back to minimal outputs due to runtime failure.")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1753 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1754 self._create_minimal_outputs(self.args.output_dir, csv_path)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1755 # Even in fallback, produce an HTML shell so tests find required text
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1756 report_file = self.backend.generate_html_report(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1757 "Image Classification Results",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1758 self.args.output_dir,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1759 backend_args,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1760 split_info,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1761 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1762 logger.info(f"HTML report (fallback) generated at: {report_file}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1763 except Exception as fb_err:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1764 logger.error(f"Failed to build fallback outputs: {fb_err}")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1765 raise
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1766
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1767 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1768 logger.error("Workflow execution failed", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1769 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1770 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1771 self._cleanup_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1772
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1773 def _postprocess_cleanup(self, output_dir: Path) -> None:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1774 """Remove large intermediates and caches to conserve disk space across tests."""
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1775 output_dir = Path(output_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1776 exp_dirs = sorted(
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1777 output_dir.glob("experiment_run*"),
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1778 key=lambda p: p.stat().st_mtime,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1779 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1780 if exp_dirs:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1781 exp_dir = exp_dirs[-1]
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1782 # Remove training checkpoints directory if present
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1783 ckpt_dir = exp_dir / "model" / "training_checkpoints"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1784 if ckpt_dir.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1785 shutil.rmtree(ckpt_dir, ignore_errors=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1786 # Remove predictions parquet once CSV is generated
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1787 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1788 if parquet_path.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1789 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1790 parquet_path.unlink()
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1791 except Exception:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1792 pass
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1793
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1794 # Clear torch hub cache under the job-scoped home, if present
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1795 job_home_torch_hub = Path.cwd() / "home" / ".cache" / "torch" / "hub"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1796 if job_home_torch_hub.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1797 shutil.rmtree(job_home_torch_hub, ignore_errors=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1798
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1799 # Also try the default user cache as a best-effort (may not exist in job sandbox)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1800 user_home_torch_hub = Path.home() / ".cache" / "torch" / "hub"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1801 if user_home_torch_hub.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1802 shutil.rmtree(user_home_torch_hub, ignore_errors=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1803
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1804 # Clear huggingface cache if present in the job sandbox
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1805 job_home_hf = Path.cwd() / "home" / ".cache" / "huggingface"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1806 if job_home_hf.exists():
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1807 shutil.rmtree(job_home_hf, ignore_errors=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1808
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1809 def _create_minimal_outputs(self, output_dir: Path, prepared_csv_path: Path) -> None:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1810 """Create a minimal set of outputs so Galaxy can collect expected artifacts.
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1811
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1812 - experiment_run/
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1813 - predictions.csv (1 column)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1814 - visualizations/train/ (empty)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1815 - visualizations/test/ (empty)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1816 - model/
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1817 - model_weights/ (empty)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1818 - model_hyperparameters.json (stub)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1819 """
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1820 output_dir = Path(output_dir)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1821 exp_dir = output_dir / "experiment_run"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1822 (exp_dir / "visualizations" / "train").mkdir(parents=True, exist_ok=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1823 (exp_dir / "visualizations" / "test").mkdir(parents=True, exist_ok=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1824 model_dir = exp_dir / "model"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1825 (model_dir / "model_weights").mkdir(parents=True, exist_ok=True)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1826
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1827 # Stub JSON so the tool's copy step succeeds
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1828 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1829 (model_dir / "model_hyperparameters.json").write_text("{}\n")
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1830 except Exception:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1831 pass
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1832
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1833 # Create a small predictions.csv with exactly 1 column
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1834 try:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1835 df_all = pd.read_csv(prepared_csv_path)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1836 from constants import SPLIT_COLUMN_NAME # local import to avoid cycle at top
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1837 num_rows = int((df_all[SPLIT_COLUMN_NAME] == 2).sum()) if SPLIT_COLUMN_NAME in df_all.columns else 1
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1838 except Exception:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1839 num_rows = 1
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1840 num_rows = max(1, num_rows)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1841 pd.DataFrame({"prediction": [0] * num_rows}).to_csv(exp_dir / "predictions.csv", index=False)
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1842
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1843
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1844 def parse_learning_rate(s):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1845 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1846 return float(s)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1847 except (TypeError, ValueError):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1848 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1849
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1850
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1851 def aug_parse(aug_string: str):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1852 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1853 Parse comma-separated augmentation keys into Ludwig augmentation dicts.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1854 Raises ValueError on unknown key.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1855 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1856 mapping = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1857 "random_horizontal_flip": {"type": "random_horizontal_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1858 "random_vertical_flip": {"type": "random_vertical_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1859 "random_rotate": {"type": "random_rotate", "degree": 10},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1860 "random_blur": {"type": "random_blur", "kernel_size": 3},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1861 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1862 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1863 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1864 aug_list = []
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1865 for tok in aug_string.split(","):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1866 key = tok.strip()
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1867 if not key:
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1868 continue
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1869 if key not in mapping:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1870 valid = ", ".join(mapping.keys())
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1871 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1872 aug_list.append(mapping[key])
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1873 return aug_list
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1874
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1875
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1876 class SplitProbAction(argparse.Action):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1877 def __call__(self, parser, namespace, values, option_string=None):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1878 train, val, test = values
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1879 total = train + val + test
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1880 if abs(total - 1.0) > 1e-6:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1881 parser.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1882 f"--split-probabilities must sum to 1.0; "
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1883 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1884 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1885 setattr(namespace, self.dest, values)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1886
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1887
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1888 def main():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1889 parser = argparse.ArgumentParser(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1890 description="Image Classification Learner with Pluggable Backends",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1891 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1892 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1893 "--csv-file",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1894 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1895 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1896 help="Path to the input CSV",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1897 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1898 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1899 "--image-zip",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1900 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1901 type=Path,
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1902 help="Path to the images ZIP or a directory containing images",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1903 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1904 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1905 "--model-name",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1906 required=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1907 choices=MODEL_ENCODER_TEMPLATES.keys(),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1908 help="Which model template to use",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1909 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1910 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1911 "--use-pretrained",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1912 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1913 help="Use pretrained weights for the model",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1914 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1915 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1916 "--fine-tune",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1917 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1918 help="Enable fine-tuning",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1919 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1920 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1921 "--epochs",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1922 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1923 default=10,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1924 help="Number of training epochs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1925 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1926 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1927 "--early-stop",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1928 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1929 default=5,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1930 help="Early stopping patience",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1931 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1932 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1933 "--batch-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1934 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1935 help="Batch size (None = auto)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1936 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1937 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1938 "--output-dir",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1939 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1940 default=Path("learner_output"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1941 help="Where to write outputs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1942 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1943 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1944 "--validation-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1945 type=float,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1946 default=0.15,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1947 help="Fraction for validation (0.0–1.0)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1948 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1949 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1950 "--preprocessing-num-processes",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1951 type=int,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1952 default=max(1, os.cpu_count() // 2),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1953 help="CPU processes for data prep",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1954 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1955 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1956 "--split-probabilities",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1957 type=float,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1958 nargs=3,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1959 metavar=("train", "val", "test"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1960 action=SplitProbAction,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1961 default=[0.7, 0.1, 0.2],
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1962 help=(
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1963 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column."
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1964 ),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1965 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1966 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1967 "--random-seed",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1968 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1969 default=42,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1970 help="Random seed used for dataset splitting (default: 42)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1971 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1972 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1973 "--learning-rate",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1974 type=parse_learning_rate,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1975 default=None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1976 help="Learning rate. If not provided, Ludwig will auto-select it.",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1977 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1978 parser.add_argument(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1979 "--augmentation",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1980 type=str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1981 default=None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1982 help=(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1983 "Comma-separated list (in order) of any of: "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1984 "random_horizontal_flip, random_vertical_flip, random_rotate, "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1985 "random_blur, random_brightness, random_contrast. "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1986 "E.g. --augmentation random_horizontal_flip,random_rotate"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1987 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1988 )
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1989 parser.add_argument(
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1990 "--image-resize",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1991 type=str,
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1992 choices=[
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1993 "original", "96x96", "128x128", "160x160", "192x192", "220x220",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1994 "224x224", "256x256", "299x299", "320x320", "384x384", "448x448", "512x512"
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1995 ],
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1996 default="original",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1997 help="Image resize option. 'original' keeps images as-is, other options resize to specified dimensions.",
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1998 )
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
1999 parser.add_argument(
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2000 "--threshold",
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2001 type=float,
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2002 default=None,
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2003 help=(
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2004 "Decision threshold for binary classification (0.0–1.0)."
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2005 "Overrides default 0.5."
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2006 ),
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
2007 )
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
2008
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2009 args = parser.parse_args()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2010
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2011 if not 0.0 <= args.validation_size <= 1.0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2012 parser.error("validation-size must be between 0.0 and 1.0")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2013 if not args.csv_file.is_file():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2014 parser.error(f"CSV not found: {args.csv_file}")
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
2015 if not (args.image_zip.is_file() or args.image_zip.is_dir()):
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
2016 parser.error(f"ZIP or directory not found: {args.image_zip}")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2017 if args.augmentation is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2018 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2019 augmentation_setup = aug_parse(args.augmentation)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2020 setattr(args, "augmentation", augmentation_setup)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2021 except ValueError as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
2022 parser.error(str(e))
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2023
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2024 backend_instance = LudwigDirectBackend()
11
c5150cceab47 planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents: 10
diff changeset
2025 orchestrator = ImageLearnerCLI(args, backend_instance)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2026
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2027 exit_code = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2028 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2029 orchestrator.run()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2030 logger.info("Main script finished successfully.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2031 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2032 logger.error(f"Main script failed.{e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2033 exit_code = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2034 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2035 sys.exit(exit_code)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2036
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2037
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
2038 if __name__ == "__main__":
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2039 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2040 import ludwig
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2041
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2042 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2043 except ImportError:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
2044 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
2045 "Ludwig library not found. Please ensure Ludwig is installed "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
2046 "('pip install ludwig[image]')"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
2047 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2048 sys.exit(1)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
2049
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2050 main()