Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 11:c5150cceab47 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
author | goeckslab |
---|---|
date | Sat, 18 Oct 2025 03:17:09 +0000 |
parents | b0d893d04d4c |
children |
rev | line source |
---|---|
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1 import argparse |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2 import json |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
3 import logging |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
4 import os |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
5 import shutil |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
6 import sys |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
7 import tempfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
8 import zipfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
9 from pathlib import Path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
10 from typing import Any, Dict, Optional, Protocol, Tuple |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
11 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
12 import matplotlib |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
13 import numpy as np |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
14 import pandas as pd |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
15 import pandas.api.types as ptypes |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
16 import yaml |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
17 from constants import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
18 IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
19 LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
20 METRIC_DISPLAY_NAMES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
21 MODEL_ENCODER_TEMPLATES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
22 SPLIT_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
23 TEMP_CONFIG_FILENAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
24 TEMP_CSV_FILENAME, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
25 TEMP_DIR_PREFIX, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
26 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
27 from ludwig.globals import ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
28 DESCRIPTION_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
29 PREDICTIONS_PARQUET_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
30 TEST_STATISTICS_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
31 TRAIN_SET_METADATA_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
32 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
33 from ludwig.utils.data_utils import get_split_path |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
34 from plotly_plots import build_classification_plots |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
35 from sklearn.model_selection import train_test_split |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
36 from utils import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
37 build_tabbed_html, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
38 encode_image_to_base64, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
39 get_html_closing, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
40 get_html_template, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
41 get_metrics_help_modal, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
42 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
43 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
44 # Set matplotlib backend after imports |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
45 matplotlib.use('Agg') |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
46 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
47 # --- Logging Setup --- |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
48 logging.basicConfig( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
49 level=logging.INFO, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
50 format="%(asctime)s %(levelname)s %(name)s: %(message)s", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
51 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
52 logger = logging.getLogger("ImageLearner") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
53 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
54 # Optional MetaFormer configuration registry |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
55 META_DEFAULT_CFGS: Dict[str, Any] = {} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
56 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
57 from MetaFormer import default_cfgs as META_DEFAULT_CFGS # type: ignore[attr-defined] |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
58 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
59 logger.debug("MetaFormer default configs unavailable: %s", e) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
60 META_DEFAULT_CFGS = {} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
61 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
62 # Try to import Ludwig visualization registry (may fail due to optional dependencies) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
63 # This must come AFTER logger is defined |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
64 _ludwig_viz_available = False |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
65 get_visualizations_registry = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
66 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
67 from ludwig.visualize import get_visualizations_registry |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
68 _ludwig_viz_available = True |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
69 logger.info("Ludwig visualizations available") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
70 except ImportError as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
71 logger.warning(f"Ludwig visualizations not available: {e}. Will use fallback plots only.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
72 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
73 logger.warning(f"Ludwig visualizations not available due to dependency issues: {e}. Will use fallback plots only.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
74 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
75 # --- MetaFormer patching integration --- |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
76 _metaformer_patch_ok = False |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
77 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
78 from MetaFormer.metaformer_stacked_cnn import patch_ludwig_stacked_cnn as _mf_patch |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
79 if _mf_patch(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
80 _metaformer_patch_ok = True |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
81 logger.info("MetaFormer patching applied for Ludwig stacked_cnn encoder.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
82 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
83 logger.warning(f"MetaFormer stacked CNN not available: {e}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
84 _metaformer_patch_ok = False |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
85 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
86 # Note: CAFormer models are now handled through MetaFormer framework |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
87 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
88 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
89 def format_config_table_html( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
90 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
91 split_info: Optional[str] = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
92 training_progress: dict = None, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
93 output_type: Optional[str] = None, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
94 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
95 display_keys = [ |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
96 "task_type", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
97 "model_name", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
98 "epochs", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
99 "batch_size", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
100 "fine_tune", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
101 "use_pretrained", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
102 "learning_rate", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
103 "random_seed", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
104 "early_stop", |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
105 "threshold", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
106 ] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
107 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
108 rows = [] |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
109 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
110 for key in display_keys: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
111 val = config.get(key, None) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
112 if key == "threshold": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
113 if output_type != "binary": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
114 continue |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
115 val = val if val is not None else 0.5 |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
116 val_str = f"{val:.2f}" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
117 if val == 0.5: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
118 val_str += " (default)" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
119 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
120 if key == "task_type": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
121 val_str = val.title() if isinstance(val, str) else "N/A" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
122 elif key == "batch_size": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
123 if val is not None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
124 val_str = int(val) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
125 else: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
126 val = "auto" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
127 val_str = "auto" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
128 resolved_val = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
129 if val is None or val == "auto": |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
130 if training_progress: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
131 resolved_val = training_progress.get("batch_size") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
132 val = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
133 "Auto-selected batch size by Ludwig:<br>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
134 f"<span style='font-size: 0.85em;'>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
135 f"{resolved_val if resolved_val else val}</span><br>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
136 "<span style='font-size: 0.85em;'>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
137 "Based on model architecture and training setup " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
138 "(e.g., fine-tuning).<br>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
139 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
140 "#trainer-parameters' target='_blank'>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
141 "Ludwig Trainer Parameters</a> for details." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
142 "</span>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
143 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
144 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
145 val = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
146 "Auto-selected by Ludwig<br>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
147 "<span style='font-size: 0.85em;'>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
148 "Automatically tuned based on architecture and dataset.<br>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
149 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
150 "#trainer-parameters' target='_blank'>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
151 "Ludwig Trainer Parameters</a> for details." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
152 "</span>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
153 ) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
154 elif key == "learning_rate": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
155 if val is not None and val != "auto": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
156 val_str = f"{val:.6f}" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
157 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
158 if training_progress: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
159 resolved_val = training_progress.get("learning_rate") |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
160 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
161 "Auto-selected learning rate by Ludwig:<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
162 f"<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
163 f"{resolved_val if resolved_val else 'auto'}</span><br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
164 "<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
165 "Based on model architecture and training setup " |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
166 "(e.g., fine-tuning).<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
167 "</span>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
168 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
169 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
170 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
171 "Auto-selected by Ludwig<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
172 "<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
173 "Automatically tuned based on architecture and dataset.<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
174 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
175 "#trainer-parameters' target='_blank'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
176 "Ludwig Trainer Parameters</a> for details." |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
177 "</span>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
178 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
179 elif key == "epochs": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
180 if val is None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
181 val_str = "N/A" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
182 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
183 if ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
184 training_progress |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
185 and "epoch" in training_progress |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
186 and val > training_progress["epoch"] |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
187 ): |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
188 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
189 f"Because of early stopping: the training " |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
190 f"stopped at epoch {training_progress['epoch']}" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
191 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
192 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
193 val_str = val |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
194 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
195 val_str = val if val is not None else "N/A" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
196 if val_str == "N/A" and key not in ["task_type"]: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
197 continue |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
198 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
199 f"<tr>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
200 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
201 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
202 f"{key.replace('_', ' ').title()}</td>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
203 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
204 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
205 f"{val_str}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
206 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
207 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
208 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
209 aug_cfg = config.get("augmentation") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
210 if aug_cfg: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
211 types = [str(a.get("type", "")) for a in aug_cfg] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
212 aug_val = ", ".join(types) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
213 rows.append( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
214 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
215 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
216 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
217 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>" |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
218 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
219 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
220 if split_info: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
221 rows.append( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
222 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
223 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
224 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
225 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
226 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
227 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
228 html = f""" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
229 <h2 style="text-align: center;">Model and Training Summary</h2> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
230 <div style="display: flex; justify-content: center;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
231 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
232 <thead><tr> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
233 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
234 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
235 </tr></thead> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
236 <tbody> |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
237 {"".join(rows)} |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
238 </tbody> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
239 </table> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
240 </div><br> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
241 <p style="text-align: center; font-size: 0.9em;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
242 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>. |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
243 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
244 Ludwig documentation provides detailed information about default model and training parameters |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
245 </a> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
246 </p><hr> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
247 """ |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
248 return html |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
249 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
250 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
251 def detect_output_type(test_stats): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
252 """Detects if the output type is 'binary' or 'category' based on test statistics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
253 label_stats = test_stats.get("label", {}) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
254 if "mean_squared_error" in label_stats: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
255 return "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
256 per_class = label_stats.get("per_class_stats", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
257 if len(per_class) == 2: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
258 return "binary" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
259 return "category" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
260 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
261 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
262 def extract_metrics_from_json( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
263 train_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
264 test_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
265 output_type: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
266 ) -> dict: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
267 """Extracts relevant metrics from training and test statistics based on the output type.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
268 metrics = {"training": {}, "validation": {}, "test": {}} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
269 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
270 def get_last_value(stats, key): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
271 val = stats.get(key) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
272 if isinstance(val, list) and val: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
273 return val[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
274 elif isinstance(val, (int, float)): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
275 return val |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
276 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
277 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
278 for split in ["training", "validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
279 split_stats = train_stats.get(split, {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
280 if not split_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
281 logging.warning(f"No statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
282 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
283 label_stats = split_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
284 if not label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
285 logging.warning(f"No label statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
286 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
287 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
288 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
289 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
290 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
291 "precision": get_last_value(label_stats, "precision"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
292 "recall": get_last_value(label_stats, "recall"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
293 "specificity": get_last_value(label_stats, "specificity"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
294 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
295 } |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
296 elif output_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
297 metrics[split] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
298 "loss": get_last_value(label_stats, "loss"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
299 "mean_absolute_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
300 label_stats, "mean_absolute_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
301 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
302 "mean_absolute_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
303 label_stats, "mean_absolute_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
304 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
305 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
306 "root_mean_squared_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
307 label_stats, "root_mean_squared_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
308 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
309 "root_mean_squared_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
310 label_stats, "root_mean_squared_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
311 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
312 "r2": get_last_value(label_stats, "r2"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
313 } |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
314 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
315 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
316 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
317 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
318 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
319 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
320 "hits_at_k": get_last_value(label_stats, "hits_at_k"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
321 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
322 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
323 # Test metrics: dynamic extraction according to exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
324 test_label_stats = test_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
325 if not test_label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
326 logging.warning("No label statistics found for test split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
327 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
328 combined_stats = test_stats.get("combined", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
329 overall_stats = test_label_stats.get("overall_stats", {}) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
330 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
331 # Define exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
332 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
333 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
334 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
335 exclude = {"per_class_stats", "confusion_matrix"} |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
336 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
337 # 1. Get all scalar test_label_stats not excluded |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
338 test_metrics = {} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
339 for k, v in test_label_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
340 if k in exclude: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
341 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
342 if k == "overall_stats": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
343 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
344 if isinstance(v, (int, float, str, bool)): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
345 test_metrics[k] = v |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
346 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
347 # 2. Add overall_stats (flattened) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
348 for k, v in overall_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
349 test_metrics[k] = v |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
350 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
351 # 3. Optionally include combined/loss if present and not already |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
352 if "loss" in combined_stats and "loss" not in test_metrics: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
353 test_metrics["loss"] = combined_stats["loss"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
354 metrics["test"] = test_metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
355 return metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
356 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
357 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
358 def generate_table_row(cells, styles): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
359 """Helper function to generate an HTML table row.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
360 return ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
361 "<tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
362 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
363 + "</tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
364 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
365 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
366 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
367 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
368 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
369 # ----------------------------------------- |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
370 def format_stats_table_html(train_stats: dict, test_stats: dict, output_type: str) -> str: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
371 """Formats a combined HTML table for training, validation, and test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
372 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
373 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
374 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
375 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
376 metric_key in all_metrics["validation"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
377 and metric_key in all_metrics["test"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
378 ): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
379 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
380 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
381 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
382 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
383 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
384 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
385 te = all_metrics["test"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
386 if all(x is not None for x in [t, v, te]): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
387 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
388 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
389 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
390 return "<table><tr><td>No metric values found.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
391 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
392 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
393 "<h2 style='text-align: center;'>Model Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
394 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
395 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
396 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
397 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
398 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
399 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
400 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
401 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
402 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
403 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
404 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
405 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
406 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
407 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
408 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
409 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
410 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
411 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
412 # ------------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
413 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
414 # ------------------------------------------- |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
415 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
416 """Format train/validation metrics into an HTML table.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
417 all_metrics = extract_metrics_from_json(train_stats, test_stats, detect_output_type(test_stats)) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
418 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
419 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
420 if metric_key in all_metrics["validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
421 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
422 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
423 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
424 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
425 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
426 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
427 if t is not None and v is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
428 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
429 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
430 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
431 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
432 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
433 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
434 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
435 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
436 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
437 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
438 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
439 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
440 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
441 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
442 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
443 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
444 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
445 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
446 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
447 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
448 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
449 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
450 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
451 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
452 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
453 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
454 # ----------------------------------------- |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
455 def format_test_merged_stats_table_html( |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
456 test_metrics: Dict[str, Any], output_type: str |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
457 ) -> str: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
458 """Format test metrics into an HTML table.""" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
459 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
460 for key in sorted(test_metrics.keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
461 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
462 value = test_metrics[key] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
463 if value is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
464 rows.append([display_name, f"{value:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
465 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
466 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
467 return "<table><tr><td>No test metric values found.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
468 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
469 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
470 "<h2 style='text-align: center;'>Test Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
471 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
472 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
473 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
474 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
475 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
476 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
477 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
478 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
479 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
480 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
481 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
482 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
483 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
484 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
485 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
486 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
487 def split_data_0_2( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
488 df: pd.DataFrame, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
489 split_column: str, |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
490 validation_size: float = 0.1, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
491 random_state: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
492 label_column: Optional[str] = None, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
493 ) -> pd.DataFrame: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
494 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
495 out = df.copy() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
496 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
497 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
498 idx_train = out.index[out[split_column] == 0].tolist() |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
499 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
500 if not idx_train: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
501 logger.info("No rows with split=0; nothing to do.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
502 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
503 stratify_arr = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
504 if label_column and label_column in out.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
505 label_counts = out.loc[idx_train, label_column].value_counts() |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
506 if label_counts.size > 1: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
507 # Force stratify even with fewer samples - adjust validation_size if needed |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
508 min_samples_per_class = label_counts.min() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
509 if min_samples_per_class * validation_size < 1: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
510 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
511 adjusted_validation_size = min( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
512 validation_size, 1.0 / min_samples_per_class |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
513 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
514 if adjusted_validation_size != validation_size: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
515 validation_size = adjusted_validation_size |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
516 logger.info( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
517 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
518 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
519 stratify_arr = out.loc[idx_train, label_column] |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
520 logger.info("Using stratified split for validation set") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
521 else: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
522 logger.warning("Only one label class found; cannot stratify") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
523 if validation_size <= 0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
524 logger.info("validation_size <= 0; keeping all as train.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
525 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
526 if validation_size >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
527 logger.info("validation_size >= 1; moving all train → validation.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
528 out.loc[idx_train, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
529 return out |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
530 # Always try stratified split first |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
531 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
532 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
533 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
534 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
535 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
536 stratify=stratify_arr, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
537 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
538 logger.info("Successfully applied stratified split") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
539 except ValueError as e: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
540 logger.warning(f"Stratified split failed ({e}); falling back to random split.") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
541 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
542 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
543 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
544 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
545 stratify=None, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
546 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
547 out.loc[train_idx, split_column] = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
548 out.loc[val_idx, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
549 out[split_column] = out[split_column].astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
550 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
551 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
552 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
553 def create_stratified_random_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
554 df: pd.DataFrame, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
555 split_column: str, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
556 split_probabilities: list = [0.7, 0.1, 0.2], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
557 random_state: int = 42, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
558 label_column: Optional[str] = None, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
559 ) -> pd.DataFrame: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
560 """Create a stratified random split when no split column exists.""" |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
561 out = df.copy() |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
562 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
563 # initialize split column |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
564 out[split_column] = 0 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
565 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
566 if not label_column or label_column not in out.columns: |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
567 logger.warning( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
568 "No label column found; using random split without stratification" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
569 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
570 # fall back to simple random assignment |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
571 indices = out.index.tolist() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
572 np.random.seed(random_state) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
573 np.random.shuffle(indices) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
574 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
575 n_total = len(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
576 n_train = int(n_total * split_probabilities[0]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
577 n_val = int(n_total * split_probabilities[1]) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
578 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
579 out.loc[indices[:n_train], split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
580 out.loc[indices[n_train:n_train + n_val], split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
581 out.loc[indices[n_train + n_val:], split_column] = 2 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
582 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
583 return out.astype({split_column: int}) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
584 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
585 # check if stratification is possible |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
586 label_counts = out[label_column].value_counts() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
587 min_samples_per_class = label_counts.min() |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
588 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
589 # ensure we have enough samples for stratification: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
590 # Each class must have at least as many samples as the number of splits, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
591 # so that each split can receive at least one sample per class. |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
592 min_samples_required = len(split_probabilities) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
593 if min_samples_per_class < min_samples_required: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
594 logger.warning( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
595 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split" |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
596 ) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
597 # fall back to simple random assignment |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
598 indices = out.index.tolist() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
599 np.random.seed(random_state) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
600 np.random.shuffle(indices) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
601 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
602 n_total = len(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
603 n_train = int(n_total * split_probabilities[0]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
604 n_val = int(n_total * split_probabilities[1]) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
605 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
606 out.loc[indices[:n_train], split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
607 out.loc[indices[n_train:n_train + n_val], split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
608 out.loc[indices[n_train + n_val:], split_column] = 2 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
609 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
610 return out.astype({split_column: int}) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
611 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
612 logger.info("Using stratified random split for train/validation/test sets") |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
613 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
614 # first split: separate test set |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
615 train_val_idx, test_idx = train_test_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
616 out.index.tolist(), |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
617 test_size=split_probabilities[2], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
618 random_state=random_state, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
619 stratify=out[label_column], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
620 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
621 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
622 # second split: separate training and validation from remaining data |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
623 val_size_adjusted = split_probabilities[1] / ( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
624 split_probabilities[0] + split_probabilities[1] |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
625 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
626 train_idx, val_idx = train_test_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
627 train_val_idx, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
628 test_size=val_size_adjusted, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
629 random_state=random_state, |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
630 stratify=out.loc[train_val_idx, label_column] if label_column and label_column in out.columns else None, |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
631 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
632 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
633 # assign split values |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
634 out.loc[train_idx, split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
635 out.loc[val_idx, split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
636 out.loc[test_idx, split_column] = 2 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
637 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
638 logger.info("Successfully applied stratified random split") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
639 logger.info( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
640 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
641 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
642 return out.astype({split_column: int}) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
643 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
644 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
645 class Backend(Protocol): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
646 """Interface for a machine learning backend.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
647 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
648 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
649 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
650 config_params: Dict[str, Any], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
651 split_config: Dict[str, Any], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
652 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
653 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
654 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
655 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
656 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
657 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
658 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
659 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
660 random_seed: int, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
661 ) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
662 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
663 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
664 def generate_plots(self, output_dir: Path) -> None: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
665 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
666 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
667 def generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
668 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
669 title: str, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
670 output_dir: str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
671 config: Dict[str, Any], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
672 split_info: str, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
673 ) -> Path: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
674 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
675 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
676 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
677 class LudwigDirectBackend: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
678 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
679 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
680 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
681 """Detect image dimensions from the first image in the dataset.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
682 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
683 import zipfile |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
684 from PIL import Image |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
685 import io |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
686 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
687 # Check if image_zip is provided |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
688 if not image_zip_path: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
689 logger.warning("No image zip provided, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
690 return 224, 224 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
691 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
692 # Extract first image to detect dimensions |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
693 with zipfile.ZipFile(image_zip_path, 'r') as z: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
694 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))] |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
695 if not image_files: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
696 logger.warning("No image files found in zip, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
697 return 224, 224 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
698 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
699 # Check first image |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
700 with z.open(image_files[0]) as f: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
701 img = Image.open(io.BytesIO(f.read())) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
702 width, height = img.size |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
703 logger.info(f"Detected image dimensions: {width}x{height}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
704 return height, width # Return as (height, width) to match encoder config |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
705 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
706 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
707 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
708 return 224, 224 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
709 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
710 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
711 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
712 config_params: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
713 split_config: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
714 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
715 logger.info("LudwigDirectBackend: Preparing YAML configuration.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
716 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
717 model_name = config_params.get("model_name", "resnet18") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
718 use_pretrained = config_params.get("use_pretrained", False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
719 fine_tune = config_params.get("fine_tune", False) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
720 if use_pretrained: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
721 trainable = bool(fine_tune) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
722 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
723 trainable = True |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
724 epochs = config_params.get("epochs", 10) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
725 batch_size = config_params.get("batch_size") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
726 num_processes = config_params.get("preprocessing_num_processes", 1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
727 early_stop = config_params.get("early_stop", None) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
728 learning_rate = config_params.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
729 learning_rate = "auto" if learning_rate is None else float(learning_rate) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
730 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
731 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
732 # --- MetaFormer detection and config logic --- |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
733 def _is_metaformer(name: str) -> bool: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
734 return isinstance(name, str) and name.startswith( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
735 ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
736 "identityformer_", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
737 "randformer_", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
738 "poolformerv2_", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
739 "convformer_", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
740 "caformer_", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
741 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
742 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
743 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
744 # Check if this is a MetaFormer model (either direct name or in custom_model) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
745 is_metaformer = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
746 _is_metaformer(model_name) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
747 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"])) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
748 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
749 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
750 metaformer_resize: Optional[Tuple[int, int]] = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
751 metaformer_channels = 3 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
752 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
753 if is_metaformer: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
754 # Handle MetaFormer models |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
755 custom_model = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
756 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
757 custom_model = raw_encoder["custom_model"] |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
758 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
759 custom_model = model_name |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
760 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
761 logger.info(f"DETECTED MetaFormer model: {custom_model}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
762 cfg_channels, cfg_height, cfg_width = 3, 224, 224 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
763 if META_DEFAULT_CFGS: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
764 model_cfg = META_DEFAULT_CFGS.get(custom_model, {}) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
765 input_size = model_cfg.get("input_size") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
766 if isinstance(input_size, (list, tuple)) and len(input_size) == 3: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
767 cfg_channels, cfg_height, cfg_width = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
768 int(input_size[0]), |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
769 int(input_size[1]), |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
770 int(input_size[2]), |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
771 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
772 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
773 target_height, target_width = cfg_height, cfg_width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
774 resize_value = config_params.get("image_resize") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
775 if resize_value and resize_value != "original": |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
776 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
777 dimensions = resize_value.split("x") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
778 if len(dimensions) == 2: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
779 target_height, target_width = int(dimensions[0]), int(dimensions[1]) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
780 if target_height <= 0 or target_width <= 0: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
781 raise ValueError( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
782 f"Image resize must be positive integers, received {resize_value}." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
783 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
784 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
785 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
786 raise ValueError(resize_value) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
787 except (ValueError, IndexError): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
788 logger.warning( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
789 "Invalid image resize format '%s'; falling back to model default %sx%s", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
790 resize_value, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
791 cfg_height, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
792 cfg_width, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
793 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
794 target_height, target_width = cfg_height, cfg_width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
795 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
796 image_zip_path = config_params.get("image_zip", "") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
797 detected_height, detected_width = self._detect_image_dimensions(image_zip_path) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
798 if use_pretrained: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
799 if (detected_height, detected_width) != (cfg_height, cfg_width): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
800 logger.info( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
801 "MetaFormer pretrained weights expect %sx%s; resizing from detected %sx%s", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
802 cfg_height, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
803 cfg_width, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
804 detected_height, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
805 detected_width, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
806 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
807 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
808 target_height, target_width = detected_height, detected_width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
809 if target_height <= 0 or target_width <= 0: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
810 raise ValueError( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
811 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
812 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
813 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
814 metaformer_channels = cfg_channels |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
815 metaformer_resize = (target_height, target_width) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
816 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
817 encoder_config = { |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
818 "type": "stacked_cnn", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
819 "height": target_height, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
820 "width": target_width, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
821 "num_channels": metaformer_channels, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
822 "output_size": 128, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
823 "use_pretrained": use_pretrained, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
824 "trainable": trainable, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
825 "custom_model": custom_model, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
826 } |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
827 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
828 elif isinstance(raw_encoder, dict): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
829 # Handle image resize for regular encoders |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
830 # Note: Standard encoders like ResNet don't support height/width parameters |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
831 # Resize will be handled at the preprocessing level by Ludwig |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
832 if config_params.get("image_resize") and config_params["image_resize"] != "original": |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
833 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
834 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
835 encoder_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
836 **raw_encoder, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
837 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
838 "trainable": trainable, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
839 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
840 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
841 encoder_config = {"type": raw_encoder} |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
842 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
843 batch_size_cfg = batch_size or "auto" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
844 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
845 label_column_path = config_params.get("label_column_data_path") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
846 label_series = None |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
847 if label_column_path is not None and Path(label_column_path).exists(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
848 try: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
849 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
850 except Exception as e: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
851 logger.warning(f"Could not read label column for task detection: {e}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
852 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
853 if ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
854 label_series is not None |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
855 and ptypes.is_numeric_dtype(label_series.dtype) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
856 and label_series.nunique() > 10 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
857 ): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
858 task_type = "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
859 else: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
860 task_type = "classification" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
861 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
862 config_params["task_type"] = task_type |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
863 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
864 image_feat: Dict[str, Any] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
865 "name": IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
866 "type": "image", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
867 } |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
868 # Set preprocessing dimensions FIRST for MetaFormer models |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
869 if is_metaformer: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
870 if metaformer_resize is None: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
871 metaformer_resize = (224, 224) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
872 height, width = metaformer_resize |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
873 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
874 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
875 # This is essential for MetaFormer models to work properly |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
876 if "preprocessing" not in image_feat: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
877 image_feat["preprocessing"] = {} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
878 image_feat["preprocessing"]["height"] = height |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
879 image_feat["preprocessing"]["width"] = width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
880 # Use infer_image_dimensions=True to allow Ludwig to read images for validation |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
881 # but set explicit max dimensions to control the output size |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
882 image_feat["preprocessing"]["infer_image_dimensions"] = True |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
883 image_feat["preprocessing"]["infer_image_max_height"] = height |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
884 image_feat["preprocessing"]["infer_image_max_width"] = width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
885 image_feat["preprocessing"]["num_channels"] = metaformer_channels |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
886 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
887 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
888 # Force Ludwig to respect our dimensions by setting additional parameters |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
889 image_feat["preprocessing"]["requires_equal_dimensions"] = False |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
890 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
891 # Now set the encoder configuration |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
892 image_feat["encoder"] = encoder_config |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
893 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
894 if config_params.get("augmentation") is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
895 image_feat["augmentation"] = config_params["augmentation"] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
896 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
897 # Add resize configuration for standard encoders (ResNet, etc.) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
898 # FIXED: MetaFormer models now respect user dimensions completely |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
899 # Previously there was a double resize issue where MetaFormer would force 224x224 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
900 # Now both MetaFormer and standard encoders respect user's resize choice |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
901 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original": |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
902 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
903 dimensions = config_params["image_resize"].split("x") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
904 if len(dimensions) == 2: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
905 height, width = int(dimensions[0]), int(dimensions[1]) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
906 if height <= 0 or width <= 0: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
907 raise ValueError( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
908 f"Image resize must be positive integers, received {config_params['image_resize']}." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
909 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
910 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
911 # Add resize to preprocessing for standard encoders |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
912 if "preprocessing" not in image_feat: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
913 image_feat["preprocessing"] = {} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
914 image_feat["preprocessing"]["height"] = height |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
915 image_feat["preprocessing"]["width"] = width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
916 # Use infer_image_dimensions=True to allow Ludwig to read images for validation |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
917 # but set explicit max dimensions to control the output size |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
918 image_feat["preprocessing"]["infer_image_dimensions"] = True |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
919 image_feat["preprocessing"]["infer_image_max_height"] = height |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
920 image_feat["preprocessing"]["infer_image_max_width"] = width |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
921 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
922 except (ValueError, IndexError): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
923 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
924 if task_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
925 output_feat = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
926 "name": LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
927 "type": "number", |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
928 "decoder": {"type": "regressor", "input_size": 1}, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
929 "loss": {"type": "mean_squared_error"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
930 "evaluation": { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
931 "metrics": [ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
932 "mean_squared_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
933 "mean_absolute_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
934 "r2", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
935 ] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
936 }, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
937 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
938 val_metric = config_params.get("validation_metric", "mean_squared_error") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
939 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
940 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
941 num_unique_labels = ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
942 label_series.nunique() if label_series is not None else 2 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
943 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
944 output_type = "binary" if num_unique_labels == 2 else "category" |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
945 # Determine if this is regression or classification based on label type |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
946 is_regression = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
947 label_series is not None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
948 and ptypes.is_numeric_dtype(label_series.dtype) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
949 and label_series.nunique() > 10 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
950 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
951 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
952 if is_regression: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
953 output_feat = { |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
954 "name": LABEL_COLUMN_NAME, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
955 "type": "number", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
956 "decoder": {"type": "regressor", "input_size": 1}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
957 "loss": {"type": "mean_squared_error"}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
958 } |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
959 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
960 if num_unique_labels == 2: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
961 output_feat = { |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
962 "name": LABEL_COLUMN_NAME, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
963 "type": "binary", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
964 "decoder": {"type": "classifier", "input_size": 1}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
965 "loss": {"type": "softmax_cross_entropy"}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
966 } |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
967 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
968 output_feat = { |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
969 "name": LABEL_COLUMN_NAME, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
970 "type": "category", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
971 "decoder": {"type": "classifier", "input_size": num_unique_labels}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
972 "loss": {"type": "softmax_cross_entropy"}, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
973 } |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
974 if output_type == "binary" and config_params.get("threshold") is not None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
975 output_feat["threshold"] = float(config_params["threshold"]) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
976 val_metric = None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
977 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
978 conf: Dict[str, Any] = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
979 "model_type": "ecd", |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
980 "input_features": [image_feat], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
981 "output_features": [output_feat], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
982 "combiner": {"type": "concat"}, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
983 "trainer": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
984 "epochs": epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
985 "early_stop": early_stop, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
986 "batch_size": batch_size_cfg, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
987 "learning_rate": learning_rate, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
988 # only set validation_metric for regression |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
989 **({"validation_metric": val_metric} if val_metric else {}), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
990 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
991 "preprocessing": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
992 "split": split_config, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
993 "num_processes": num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
994 "in_memory": False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
995 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
996 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
997 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
998 logger.debug("LudwigDirectBackend: Config dict built.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
999 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1000 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1001 logger.info("LudwigDirectBackend: YAML config generated.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1002 return yaml_str |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1003 except Exception: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1004 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1005 "LudwigDirectBackend: Failed to serialize YAML.", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1006 exc_info=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1007 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1008 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1009 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1010 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1011 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1012 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1013 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1014 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1015 random_seed: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1016 ) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1017 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1018 logger.info("LudwigDirectBackend: Starting experiment execution.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1019 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1020 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1021 from ludwig.experiment import experiment_cli |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1022 except ImportError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1023 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1024 "LudwigDirectBackend: Could not import experiment_cli.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1025 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1026 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1027 raise RuntimeError("Ludwig import failed.") from e |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1028 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1029 output_dir.mkdir(parents=True, exist_ok=True) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1030 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1031 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1032 experiment_cli( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1033 dataset=str(dataset_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1034 config=str(config_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1035 output_directory=str(output_dir), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1036 random_seed=random_seed, |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1037 skip_preprocessing=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1038 ) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1039 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1040 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1041 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1042 except TypeError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1043 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1044 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1045 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1046 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1047 raise RuntimeError("Ludwig argument error.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1048 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1049 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1050 "LudwigDirectBackend: Experiment execution error.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1051 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1052 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1053 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1054 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1055 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1056 """Retrieve the learning rate used in the most recent Ludwig run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1057 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1058 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1059 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1060 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1061 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1062 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1063 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1064 logger.warning(f"No experiment run directories found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1065 return None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1066 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1067 progress_file = exp_dirs[-1] / "model" / "training_progress.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1068 if not progress_file.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1069 logger.warning(f"No training_progress.json found in {progress_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1070 return None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1071 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1072 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1073 with progress_file.open("r", encoding="utf-8") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1074 data = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1075 return { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1076 "learning_rate": data.get("learning_rate"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1077 "batch_size": data.get("batch_size"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1078 "epoch": data.get("epoch"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1079 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1080 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1081 logger.warning(f"Failed to read training progress info: {e}") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1082 return {} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1083 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1084 def convert_parquet_to_csv(self, output_dir: Path): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1085 """Convert the predictions Parquet file to CSV.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1086 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1087 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1088 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1089 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1090 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1091 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1092 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1093 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1094 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1095 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1096 csv_path = exp_dir / "predictions.csv" |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1097 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1098 # Check if parquet file exists before trying to convert |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1099 if not parquet_path.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1100 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1101 return |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1102 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1103 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1104 df = pd.read_parquet(parquet_path) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1105 df.to_csv(csv_path, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1106 logger.info(f"Converted Parquet to CSV: {csv_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1107 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1108 logger.error(f"Error converting Parquet to CSV: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1109 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1110 def generate_plots(self, output_dir: Path) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1111 """Generate all registered Ludwig visualizations for the latest experiment run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1112 logger.info("Generating all Ludwig visualizations…") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1113 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1114 test_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1115 "compare_performance", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1116 "compare_classifiers_performance_from_prob", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1117 "compare_classifiers_performance_from_pred", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1118 "compare_classifiers_performance_changing_k", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1119 "compare_classifiers_multiclass_multimetric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1120 "compare_classifiers_predictions", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1121 "confidence_thresholding_2thresholds_2d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1122 "confidence_thresholding_2thresholds_3d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1123 "confidence_thresholding", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1124 "confidence_thresholding_data_vs_acc", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1125 "binary_threshold_vs_metric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1126 "roc_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1127 "roc_curves_from_test_statistics", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1128 "calibration_1_vs_all", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1129 "calibration_multiclass", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1130 "confusion_matrix", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1131 "frequency_vs_f1", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1132 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1133 train_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1134 "learning_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1135 "compare_classifiers_performance_subset", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1136 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1137 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1138 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1139 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1140 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1141 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1142 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1143 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1144 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1145 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1146 exp_dir = exp_dirs[-1] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1147 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1148 viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1149 viz_dir.mkdir(exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1150 train_viz = viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1151 test_viz = viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1152 train_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1153 test_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1154 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1155 def _check(p: Path) -> Optional[str]: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1156 return str(p) if p.exists() else None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1157 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1158 training_stats = _check(exp_dir / "training_statistics.json") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1159 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1160 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1161 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1162 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1163 dataset_path = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1164 split_file = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1165 desc = exp_dir / DESCRIPTION_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1166 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1167 with open(desc, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1168 cfg = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1169 dataset_path = _check(Path(cfg.get("dataset", ""))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1170 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1171 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1172 output_feature = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1173 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1174 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1175 output_feature = cfg["config"]["output_features"][0]["name"] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1176 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1177 pass |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1178 if not output_feature and test_stats: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1179 with open(test_stats, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1180 stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1181 output_feature = next(iter(stats.keys()), "") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1182 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1183 viz_registry = get_visualizations_registry() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1184 for viz_name, viz_func in viz_registry.items(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1185 if viz_name in train_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1186 viz_dir_plot = train_viz |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1187 elif viz_name in test_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1188 viz_dir_plot = test_viz |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1189 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1190 continue |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1191 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1192 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1193 viz_func( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1194 training_statistics=[training_stats] if training_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1195 test_statistics=[test_stats] if test_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1196 probabilities=[probs_path] if probs_path else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1197 output_feature_name=output_feature, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1198 ground_truth_split=2, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1199 top_n_classes=[0], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1200 top_k=3, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1201 ground_truth_metadata=gt_metadata, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1202 ground_truth=dataset_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1203 split_file=split_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1204 output_directory=str(viz_dir_plot), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1205 normalize=False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1206 file_format="png", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1207 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1208 logger.info(f"✔ Generated {viz_name}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1209 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1210 logger.warning(f"✘ Skipped {viz_name}: {e}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1211 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1212 logger.info(f"All visualizations written to {viz_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1213 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1214 def generate_html_report( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1215 self, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1216 title: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1217 output_dir: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1218 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1219 split_info: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1220 ) -> Path: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1221 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1222 cwd = Path.cwd() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1223 report_name = title.lower().replace(" ", "_") + "_report.html" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1224 report_path = cwd / report_name |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1225 output_dir = Path(output_dir) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1226 output_type = None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1227 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1228 exp_dirs = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1229 output_dir.glob("experiment_run*"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1230 key=lambda p: p.stat().st_mtime, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1231 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1232 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1233 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1234 exp_dir = exp_dirs[-1] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1235 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1236 base_viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1237 train_viz_dir = base_viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1238 test_viz_dir = base_viz_dir / "test" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1239 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1240 html = get_html_template() |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1241 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1242 # Extra CSS & JS: center Plotly and enable CSV download for predictions table |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1243 html += """ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1244 <style> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1245 /* Center Plotly figures (both wrapper and native classes) */ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1246 .plotly-center { display: flex; justify-content: center; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1247 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1248 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1249 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1250 /* Download button for predictions table */ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1251 .download-btn { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1252 padding: 8px 12px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1253 border: 1px solid #4CAF50; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1254 background: #4CAF50; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1255 color: white; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1256 border-radius: 6px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1257 cursor: pointer; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1258 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1259 .download-btn:hover { filter: brightness(0.95); } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1260 .preds-controls { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1261 display: flex; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1262 justify-content: flex-end; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1263 gap: 8px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1264 margin: 8px 0; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1265 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1266 </style> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1267 <script> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1268 function tableToCSV(table){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1269 const rows = Array.from(table.querySelectorAll('tr')); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1270 return rows.map(row => |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1271 Array.from(row.querySelectorAll('th,td')).map(cell => { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1272 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim(); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1273 if (text.includes('"') || text.includes(',')) { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1274 text = '"' + text.replace(/"/g,'""') + '"'; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1275 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1276 return text; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1277 }).join(',') |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1278 ).join('\\n'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1279 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1280 document.addEventListener('DOMContentLoaded', function(){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1281 const btn = document.getElementById('downloadPredsCsv'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1282 if(btn){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1283 btn.addEventListener('click', function(){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1284 const tbl = document.querySelector('.predictions-table'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1285 if(!tbl){ alert('Predictions table not found.'); return; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1286 const csv = tableToCSV(tbl); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1287 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'}); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1288 const url = URL.createObjectURL(blob); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1289 const a = document.createElement('a'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1290 a.href = url; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1291 a.download = 'ground_truth_vs_predictions.csv'; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1292 document.body.appendChild(a); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1293 a.click(); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1294 document.body.removeChild(a); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1295 URL.revokeObjectURL(url); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1296 }); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1297 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1298 }); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1299 </script> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1300 """ |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1301 html += f"<h1>{title}</h1>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1302 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1303 metrics_html = "" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1304 train_val_metrics_html = "" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1305 test_metrics_html = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1306 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1307 train_stats_path = exp_dir / "training_statistics.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1308 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1309 if train_stats_path.exists() and test_stats_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1310 with open(train_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1311 train_stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1312 with open(test_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1313 test_stats = json.load(f) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1314 output_type = detect_output_type(test_stats) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1315 metrics_html = format_stats_table_html(train_stats, test_stats, output_type) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1316 train_val_metrics_html = format_train_val_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1317 train_stats, test_stats |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1318 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1319 test_metrics_html = format_test_merged_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1320 extract_metrics_from_json(train_stats, test_stats, output_type)[ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1321 "test" |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1322 ], output_type |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1323 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1324 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1325 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1326 f"Could not load stats for HTML report: {type(e).__name__}: {e}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1327 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1328 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1329 config_html = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1330 training_progress = self.get_training_process(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1331 try: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1332 config_html = format_config_table_html( |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1333 config, split_info, training_progress, output_type |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1334 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1335 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1336 logger.warning(f"Could not load config for HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1337 |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1338 # ---------- image rendering with exclusions ---------- |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1339 def render_img_section( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1340 title: str, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1341 dir_path: Path, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1342 output_type: str = None, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1343 exclude_names: Optional[set] = None, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1344 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1345 if not dir_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1346 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1347 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1348 exclude_names = exclude_names or set() |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1349 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1350 imgs = list(dir_path.glob("*.png")) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1351 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1352 # Exclude ROC curves and standard confusion matrices (keep only entropy version) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1353 default_exclude = { |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1354 # "roc_curves.png", # Remove ROC curves from test tab |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1355 "confusion_matrix__label_top5.png", # Remove standard confusion matrix |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1356 "confusion_matrix__label_top10.png", # Remove duplicate |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1357 "confusion_matrix__label_top6.png", # Remove duplicate |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1358 "confusion_matrix_entropy__label_top10.png", # Keep only top5 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1359 "confusion_matrix_entropy__label_top6.png", # Keep only top5 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1360 } |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1361 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1362 imgs = [ |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1363 img |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1364 for img in imgs |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1365 if img.name not in default_exclude |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1366 and img.name not in exclude_names |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1367 ] |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1368 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1369 if not imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1370 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1371 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1372 # Sort images by name for consistent ordering (works with string and numeric labels) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1373 imgs = sorted(imgs, key=lambda x: x.name) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1374 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1375 html_section = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1376 for img in imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1377 b64 = encode_image_to_base64(str(img)) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1378 img_title = img.stem.replace("_", " ").title() |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1379 html_section += ( |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1380 f"<h2 style='text-align: center;'>{img_title}</h2>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1381 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1382 f'<img src="data:image/png;base64,{b64}" ' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1383 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1384 f"</div>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1385 ) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1386 return html_section |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1387 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1388 tab1_content = config_html + metrics_html |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1389 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1390 tab2_content = train_val_metrics_html + render_img_section( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1391 "Training and Validation Visualizations", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1392 train_viz_dir, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1393 output_type, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1394 exclude_names={ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1395 "compare_classifiers_performance_from_prob.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1396 "roc_curves_from_prediction_statistics.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1397 "precision_recall_curves_from_prediction_statistics.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1398 "precision_recall_curve.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1399 }, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1400 ) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1401 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1402 # --- Predictions vs Ground Truth table (REGRESSION ONLY) --- |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1403 preds_section = "" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1404 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1405 if output_type == "regression" and parquet_path.exists(): |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1406 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1407 # 1) load predictions from Parquet |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1408 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1409 # assume the column containing your model's prediction is named "prediction" |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1410 # or contains that substring: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1411 pred_col = next( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1412 (c for c in df_preds.columns if "prediction" in c.lower()), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1413 None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1414 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1415 if pred_col is None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1416 raise ValueError("No prediction column found in Parquet output") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1417 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1418 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1419 # 2) load ground truth for the test split from prepared CSV |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1420 df_all = pd.read_csv(config["label_column_data_path"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1421 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1422 LABEL_COLUMN_NAME |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1423 ].reset_index(drop=True) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1424 # 3) concatenate side-by-side |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1425 df_table = pd.concat([df_gt, df_pred], axis=1) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1426 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1427 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1428 # 4) render as HTML |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1429 preds_html = df_table.to_html(index=False, classes="predictions-table") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1430 preds_section = ( |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1431 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1432 "<div class='preds-controls'>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1433 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1434 "</div>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1435 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>" |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1436 + preds_html |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1437 + "</div>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1438 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1439 except Exception as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1440 logger.warning(f"Could not build Predictions vs GT table: {e}") |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1441 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1442 tab3_content = test_metrics_html + preds_section |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1443 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1444 if output_type in ("binary", "category") and test_stats_path.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1445 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1446 interactive_plots = build_classification_plots( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1447 str(test_stats_path), |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1448 str(train_stats_path) if train_stats_path.exists() else None, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1449 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1450 for plot in interactive_plots: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1451 tab3_content += ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1452 f"<h2 style='text-align: center;'>{plot['title']}</h2>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1453 f"<div class='plotly-center'>{plot['html']}</div>" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1454 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1455 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1456 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1457 logger.warning(f"Could not generate Plotly plots: {e}") |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1458 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1459 # Add static TEST PNGs (with default dedupe/exclusions) |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1460 tab3_content += render_img_section( |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1461 "Test Visualizations", test_viz_dir, output_type |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1462 ) |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1463 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1464 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1465 modal_html = get_metrics_help_modal() |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1466 html += tabbed_html + modal_html + get_html_closing() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1467 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1468 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1469 with open(report_path, "w") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1470 f.write(html) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1471 logger.info(f"HTML report generated at: {report_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1472 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1473 logger.error(f"Failed to write HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1474 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1475 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1476 return report_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1477 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1478 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1479 class WorkflowOrchestrator: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1480 """Manages the image-classification workflow.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1481 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1482 def __init__(self, args: argparse.Namespace, backend: Backend): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1483 self.args = args |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1484 self.backend = backend |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1485 self.temp_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1486 self.image_extract_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1487 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1488 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1489 def run(self) -> None: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1490 """Execute the full workflow end-to-end.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1491 # Delegate to the backend's run_experiment method |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1492 self.backend.run_experiment() |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1493 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1494 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1495 class ImageLearnerCLI: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1496 """Manages the image-classification workflow.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1497 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1498 def __init__(self, args: argparse.Namespace, backend: Backend): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1499 self.args = args |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1500 self.backend = backend |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1501 self.temp_dir: Optional[Path] = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1502 self.image_extract_dir: Optional[Path] = None |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1503 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1504 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1505 def _create_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1506 """Create temporary output and image extraction directories.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1507 try: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1508 self.temp_dir = Path( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1509 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1510 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1511 self.image_extract_dir = self.temp_dir / "images" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1512 self.image_extract_dir.mkdir() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1513 logger.info(f"Created temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1514 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1515 logger.error("Failed to create temporary directories", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1516 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1517 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1518 def _extract_images(self) -> None: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1519 """Extract images into the temp image directory. |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1520 - If a ZIP file is provided, extract it |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1521 - If a directory is provided, copy its contents |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1522 """ |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1523 if self.image_extract_dir is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1524 raise RuntimeError("Temp image directory not initialized.") |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1525 src = Path(self.args.image_zip) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1526 logger.info(f"Preparing images from {src} → {self.image_extract_dir}") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1527 try: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1528 if src.is_dir(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1529 # copy directory tree |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1530 for root, dirs, files in os.walk(src): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1531 rel = Path(root).relative_to(src) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1532 target_root = self.image_extract_dir / rel |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1533 target_root.mkdir(parents=True, exist_ok=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1534 for fn in files: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1535 shutil.copy2(Path(root) / fn, target_root / fn) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1536 logger.info("Image directory copied.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1537 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1538 with zipfile.ZipFile(src, "r") as z: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1539 z.extractall(self.image_extract_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1540 logger.info("Image extraction complete.") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1541 except Exception: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1542 logger.error("Error preparing images", exc_info=True) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1543 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1544 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1545 def _process_fixed_split( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1546 self, df: pd.DataFrame |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1547 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1548 """Process datasets that already have a split column.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1549 unique = set(df[SPLIT_COLUMN_NAME].unique()) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1550 if unique == {0, 2}: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1551 # Split 0/2 detected, create validation set |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1552 df = split_data_0_2( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1553 df=df, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1554 split_column=SPLIT_COLUMN_NAME, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1555 validation_size=self.args.validation_size, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1556 random_state=self.args.random_seed, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1557 label_column=LABEL_COLUMN_NAME, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1558 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1559 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1560 split_info = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1561 "Detected a split column (with values 0 and 2) in the input CSV. " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1562 f"Used this column as a base and reassigned " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1563 f"{self.args.validation_size * 100:.1f}% " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1564 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1565 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1566 logger.info("Applied custom 0/2 split.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1567 elif unique.issubset({0, 1, 2}): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1568 # Standard 0/1/2 split |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1569 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME} |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1570 split_info = ( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1571 "Detected a split column with train(0)/validation(1)/test(2) " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1572 "values in the input CSV. Used this column as-is." |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1573 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1574 logger.info("Fixed split column detected.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1575 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1576 raise ValueError( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1577 f"Split column contains unexpected values: {unique}. " |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1578 "Expected: {{0,1,2}} or {{0,2}}" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1579 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1580 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1581 return df, split_config, split_info |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1582 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1583 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1584 """Load CSV, update image paths, handle splits, and write prepared CSV.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1585 if not self.temp_dir or not self.image_extract_dir: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1586 raise RuntimeError("Temp dirs not initialized before data prep.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1587 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1588 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1589 df = pd.read_csv(self.args.csv_file) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1590 logger.info(f"Loaded CSV: {self.args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1591 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1592 logger.error("Error loading CSV file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1593 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1594 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1595 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1596 missing = required - set(df.columns) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1597 if missing: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1598 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1599 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1600 try: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1601 # Use relative paths that Ludwig can resolve from its internal working directory |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1602 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1603 lambda p: str(Path("images") / p) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1604 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1605 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1606 logger.error("Error updating image paths", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1607 raise |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1608 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1609 if SPLIT_COLUMN_NAME in df.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1610 df, split_config, split_info = self._process_fixed_split(df) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1611 else: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1612 logger.info("No split column; creating stratified random split") |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1613 df = create_stratified_random_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1614 df=df, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1615 split_column=SPLIT_COLUMN_NAME, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1616 split_probabilities=self.args.split_probabilities, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1617 random_state=self.args.random_seed, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1618 label_column=LABEL_COLUMN_NAME, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1619 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1620 split_config = { |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1621 "type": "fixed", |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1622 "column": SPLIT_COLUMN_NAME, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1623 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1624 split_info = ( |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1625 f"No split column in CSV. Created stratified random split: " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1626 f"{[int(p * 100) for p in self.args.split_probabilities]}% " |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1627 f"for train/val/test with balanced label distribution." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1628 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1629 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1630 final_csv = self.temp_dir / TEMP_CSV_FILENAME |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1631 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1632 try: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1633 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1634 df.to_csv(final_csv, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1635 logger.info(f"Saved prepared data to {final_csv}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1636 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1637 logger.error("Error saving prepared CSV", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1638 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1639 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1640 return final_csv, split_config, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1641 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1642 # Removed duplicate method |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1643 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1644 def _detect_image_dimensions(self) -> Tuple[int, int]: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1645 """Detect image dimensions from the first image in the dataset.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1646 try: |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1647 import zipfile |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1648 from PIL import Image |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1649 import io |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1650 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1651 # Check if image_zip is provided |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1652 if not self.args.image_zip: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1653 logger.warning("No image zip provided, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1654 return 224, 224 |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1655 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1656 # Extract first image to detect dimensions |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1657 with zipfile.ZipFile(self.args.image_zip, 'r') as z: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1658 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))] |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1659 if not image_files: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1660 logger.warning("No image files found in zip, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1661 return 224, 224 |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1662 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1663 # Check first image |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1664 with z.open(image_files[0]) as f: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1665 img = Image.open(io.BytesIO(f.read())) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1666 width, height = img.size |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1667 logger.info(f"Detected image dimensions: {width}x{height}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1668 return height, width # Return as (height, width) to match encoder config |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1669 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1670 except Exception as e: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1671 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1672 return 224, 224 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1673 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1674 def _cleanup_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1675 if self.temp_dir and self.temp_dir.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1676 logger.info(f"Cleaning up temp directory: {self.temp_dir}") |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1677 # Don't clean up for debugging |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1678 shutil.rmtree(self.temp_dir, ignore_errors=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1679 self.temp_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1680 self.image_extract_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1681 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1682 def run(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1683 """Execute the full workflow end-to-end.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1684 logger.info("Starting workflow...") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1685 self.args.output_dir.mkdir(parents=True, exist_ok=True) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1686 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1687 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1688 self._create_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1689 self._extract_images() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1690 csv_path, split_cfg, split_info = self._prepare_data() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1691 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1692 use_pretrained = self.args.use_pretrained or self.args.fine_tune |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1693 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1694 backend_args = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1695 "model_name": self.args.model_name, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1696 "fine_tune": self.args.fine_tune, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1697 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1698 "epochs": self.args.epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1699 "batch_size": self.args.batch_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1700 "preprocessing_num_processes": self.args.preprocessing_num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1701 "split_probabilities": self.args.split_probabilities, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1702 "learning_rate": self.args.learning_rate, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1703 "random_seed": self.args.random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1704 "early_stop": self.args.early_stop, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1705 "label_column_data_path": csv_path, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1706 "augmentation": self.args.augmentation, |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1707 "image_resize": self.args.image_resize, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1708 "image_zip": self.args.image_zip, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1709 "threshold": self.args.threshold, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1710 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1711 yaml_str = self.backend.prepare_config(backend_args, split_cfg) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1712 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1713 config_file = self.temp_dir / TEMP_CONFIG_FILENAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1714 config_file.write_text(yaml_str) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1715 logger.info(f"Wrote backend config: {config_file}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1716 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1717 ran_ok = True |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1718 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1719 # Run Ludwig experiment with absolute paths to avoid working directory issues |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1720 self.backend.run_experiment( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1721 csv_path, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1722 config_file, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1723 self.args.output_dir, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1724 self.args.random_seed, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1725 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1726 except Exception: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1727 logger.error("Workflow execution failed", exc_info=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1728 ran_ok = False |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1729 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1730 if ran_ok: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1731 logger.info("Workflow completed successfully.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1732 # Generate a very small set of plots to conserve disk space |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1733 self.backend.generate_plots(self.args.output_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1734 # Build HTML report (robust to missing metrics) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1735 report_file = self.backend.generate_html_report( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1736 "Image Classification Results", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1737 self.args.output_dir, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1738 backend_args, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1739 split_info, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1740 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1741 logger.info(f"HTML report generated at: {report_file}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1742 # Convert predictions parquet → csv |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1743 self.backend.convert_parquet_to_csv(self.args.output_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1744 logger.info("Converted Parquet to CSV.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1745 # Post-process cleanup to reduce disk footprint for subsequent tests |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1746 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1747 self._postprocess_cleanup(self.args.output_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1748 except Exception as cleanup_err: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1749 logger.warning(f"Cleanup step failed: {cleanup_err}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1750 else: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1751 # Fallback: create minimal outputs so downstream steps can proceed |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1752 logger.warning("Falling back to minimal outputs due to runtime failure.") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1753 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1754 self._create_minimal_outputs(self.args.output_dir, csv_path) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1755 # Even in fallback, produce an HTML shell so tests find required text |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1756 report_file = self.backend.generate_html_report( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1757 "Image Classification Results", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1758 self.args.output_dir, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1759 backend_args, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1760 split_info, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1761 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1762 logger.info(f"HTML report (fallback) generated at: {report_file}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1763 except Exception as fb_err: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1764 logger.error(f"Failed to build fallback outputs: {fb_err}") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1765 raise |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1766 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1767 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1768 logger.error("Workflow execution failed", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1769 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1770 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1771 self._cleanup_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1772 |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1773 def _postprocess_cleanup(self, output_dir: Path) -> None: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1774 """Remove large intermediates and caches to conserve disk space across tests.""" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1775 output_dir = Path(output_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1776 exp_dirs = sorted( |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1777 output_dir.glob("experiment_run*"), |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1778 key=lambda p: p.stat().st_mtime, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1779 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1780 if exp_dirs: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1781 exp_dir = exp_dirs[-1] |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1782 # Remove training checkpoints directory if present |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1783 ckpt_dir = exp_dir / "model" / "training_checkpoints" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1784 if ckpt_dir.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1785 shutil.rmtree(ckpt_dir, ignore_errors=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1786 # Remove predictions parquet once CSV is generated |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1787 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1788 if parquet_path.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1789 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1790 parquet_path.unlink() |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1791 except Exception: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1792 pass |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1793 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1794 # Clear torch hub cache under the job-scoped home, if present |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1795 job_home_torch_hub = Path.cwd() / "home" / ".cache" / "torch" / "hub" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1796 if job_home_torch_hub.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1797 shutil.rmtree(job_home_torch_hub, ignore_errors=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1798 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1799 # Also try the default user cache as a best-effort (may not exist in job sandbox) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1800 user_home_torch_hub = Path.home() / ".cache" / "torch" / "hub" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1801 if user_home_torch_hub.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1802 shutil.rmtree(user_home_torch_hub, ignore_errors=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1803 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1804 # Clear huggingface cache if present in the job sandbox |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1805 job_home_hf = Path.cwd() / "home" / ".cache" / "huggingface" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1806 if job_home_hf.exists(): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1807 shutil.rmtree(job_home_hf, ignore_errors=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1808 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1809 def _create_minimal_outputs(self, output_dir: Path, prepared_csv_path: Path) -> None: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1810 """Create a minimal set of outputs so Galaxy can collect expected artifacts. |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1811 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1812 - experiment_run/ |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1813 - predictions.csv (1 column) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1814 - visualizations/train/ (empty) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1815 - visualizations/test/ (empty) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1816 - model/ |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1817 - model_weights/ (empty) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1818 - model_hyperparameters.json (stub) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1819 """ |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1820 output_dir = Path(output_dir) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1821 exp_dir = output_dir / "experiment_run" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1822 (exp_dir / "visualizations" / "train").mkdir(parents=True, exist_ok=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1823 (exp_dir / "visualizations" / "test").mkdir(parents=True, exist_ok=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1824 model_dir = exp_dir / "model" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1825 (model_dir / "model_weights").mkdir(parents=True, exist_ok=True) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1826 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1827 # Stub JSON so the tool's copy step succeeds |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1828 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1829 (model_dir / "model_hyperparameters.json").write_text("{}\n") |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1830 except Exception: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1831 pass |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1832 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1833 # Create a small predictions.csv with exactly 1 column |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1834 try: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1835 df_all = pd.read_csv(prepared_csv_path) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1836 from constants import SPLIT_COLUMN_NAME # local import to avoid cycle at top |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1837 num_rows = int((df_all[SPLIT_COLUMN_NAME] == 2).sum()) if SPLIT_COLUMN_NAME in df_all.columns else 1 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1838 except Exception: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1839 num_rows = 1 |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1840 num_rows = max(1, num_rows) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1841 pd.DataFrame({"prediction": [0] * num_rows}).to_csv(exp_dir / "predictions.csv", index=False) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1842 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1843 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1844 def parse_learning_rate(s): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1845 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1846 return float(s) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1847 except (TypeError, ValueError): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1848 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1849 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1850 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1851 def aug_parse(aug_string: str): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1852 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1853 Parse comma-separated augmentation keys into Ludwig augmentation dicts. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1854 Raises ValueError on unknown key. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1855 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1856 mapping = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1857 "random_horizontal_flip": {"type": "random_horizontal_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1858 "random_vertical_flip": {"type": "random_vertical_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1859 "random_rotate": {"type": "random_rotate", "degree": 10}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1860 "random_blur": {"type": "random_blur", "kernel_size": 3}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1861 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1862 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1863 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1864 aug_list = [] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1865 for tok in aug_string.split(","): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1866 key = tok.strip() |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1867 if not key: |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1868 continue |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1869 if key not in mapping: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1870 valid = ", ".join(mapping.keys()) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1871 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1872 aug_list.append(mapping[key]) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1873 return aug_list |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1874 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1875 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1876 class SplitProbAction(argparse.Action): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1877 def __call__(self, parser, namespace, values, option_string=None): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1878 train, val, test = values |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1879 total = train + val + test |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1880 if abs(total - 1.0) > 1e-6: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1881 parser.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1882 f"--split-probabilities must sum to 1.0; " |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1883 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1884 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1885 setattr(namespace, self.dest, values) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1886 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1887 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1888 def main(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1889 parser = argparse.ArgumentParser( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1890 description="Image Classification Learner with Pluggable Backends", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1891 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1892 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1893 "--csv-file", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1894 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1895 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1896 help="Path to the input CSV", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1897 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1898 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1899 "--image-zip", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1900 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1901 type=Path, |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1902 help="Path to the images ZIP or a directory containing images", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1903 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1904 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1905 "--model-name", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1906 required=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1907 choices=MODEL_ENCODER_TEMPLATES.keys(), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1908 help="Which model template to use", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1909 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1910 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1911 "--use-pretrained", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1912 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1913 help="Use pretrained weights for the model", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1914 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1915 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1916 "--fine-tune", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1917 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1918 help="Enable fine-tuning", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1919 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1920 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1921 "--epochs", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1922 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1923 default=10, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1924 help="Number of training epochs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1925 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1926 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1927 "--early-stop", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1928 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1929 default=5, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1930 help="Early stopping patience", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1931 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1932 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1933 "--batch-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1934 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1935 help="Batch size (None = auto)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1936 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1937 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1938 "--output-dir", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1939 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1940 default=Path("learner_output"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1941 help="Where to write outputs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1942 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1943 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1944 "--validation-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1945 type=float, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1946 default=0.15, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1947 help="Fraction for validation (0.0–1.0)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1948 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1949 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1950 "--preprocessing-num-processes", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1951 type=int, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1952 default=max(1, os.cpu_count() // 2), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1953 help="CPU processes for data prep", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1954 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1955 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1956 "--split-probabilities", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1957 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1958 nargs=3, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1959 metavar=("train", "val", "test"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1960 action=SplitProbAction, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1961 default=[0.7, 0.1, 0.2], |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1962 help=( |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1963 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column." |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1964 ), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1965 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1966 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1967 "--random-seed", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1968 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1969 default=42, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1970 help="Random seed used for dataset splitting (default: 42)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1971 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1972 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1973 "--learning-rate", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1974 type=parse_learning_rate, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1975 default=None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1976 help="Learning rate. If not provided, Ludwig will auto-select it.", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1977 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1978 parser.add_argument( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1979 "--augmentation", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1980 type=str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1981 default=None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1982 help=( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1983 "Comma-separated list (in order) of any of: " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1984 "random_horizontal_flip, random_vertical_flip, random_rotate, " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1985 "random_blur, random_brightness, random_contrast. " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1986 "E.g. --augmentation random_horizontal_flip,random_rotate" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1987 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1988 ) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1989 parser.add_argument( |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1990 "--image-resize", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1991 type=str, |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1992 choices=[ |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1993 "original", "96x96", "128x128", "160x160", "192x192", "220x220", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1994 "224x224", "256x256", "299x299", "320x320", "384x384", "448x448", "512x512" |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1995 ], |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1996 default="original", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1997 help="Image resize option. 'original' keeps images as-is, other options resize to specified dimensions.", |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1998 ) |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
1999 parser.add_argument( |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2000 "--threshold", |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2001 type=float, |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2002 default=None, |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2003 help=( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2004 "Decision threshold for binary classification (0.0–1.0)." |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2005 "Overrides default 0.5." |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2006 ), |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
2007 ) |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
2008 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2009 args = parser.parse_args() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2010 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2011 if not 0.0 <= args.validation_size <= 1.0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2012 parser.error("validation-size must be between 0.0 and 1.0") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2013 if not args.csv_file.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2014 parser.error(f"CSV not found: {args.csv_file}") |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
2015 if not (args.image_zip.is_file() or args.image_zip.is_dir()): |
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
2016 parser.error(f"ZIP or directory not found: {args.image_zip}") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2017 if args.augmentation is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2018 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2019 augmentation_setup = aug_parse(args.augmentation) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2020 setattr(args, "augmentation", augmentation_setup) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2021 except ValueError as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
2022 parser.error(str(e)) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2023 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2024 backend_instance = LudwigDirectBackend() |
11
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
goeckslab
parents:
10
diff
changeset
|
2025 orchestrator = ImageLearnerCLI(args, backend_instance) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2026 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2027 exit_code = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2028 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2029 orchestrator.run() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2030 logger.info("Main script finished successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2031 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2032 logger.error(f"Main script failed.{e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2033 exit_code = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2034 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2035 sys.exit(exit_code) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2036 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2037 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
2038 if __name__ == "__main__": |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2039 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2040 import ludwig |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2041 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2042 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2043 except ImportError: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
2044 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
2045 "Ludwig library not found. Please ensure Ludwig is installed " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
2046 "('pip install ludwig[image]')" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
2047 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2048 sys.exit(1) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
2049 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2050 main() |