Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 3:2c3a3dfaf1a9 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 5ab02b6688c9af85525dd225134db8f2bbed76ed
author | goeckslab |
---|---|
date | Fri, 04 Jul 2025 03:44:56 +0000 (13 hours ago) |
parents | 186424a7eca7 |
children |
rev | line source |
---|---|
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1 import argparse |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2 import json |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
3 import logging |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
4 import os |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
5 import shutil |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
6 import sys |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
7 import tempfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
8 import zipfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
9 from pathlib import Path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
10 from typing import Any, Dict, Optional, Protocol, Tuple |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
11 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
12 import pandas as pd |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
13 import pandas.api.types as ptypes |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
14 import yaml |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
15 from constants import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
16 IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
17 LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
18 METRIC_DISPLAY_NAMES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
19 MODEL_ENCODER_TEMPLATES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
20 SPLIT_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
21 TEMP_CONFIG_FILENAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
22 TEMP_CSV_FILENAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
23 TEMP_DIR_PREFIX |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
24 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
25 from ludwig.globals import ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
26 DESCRIPTION_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
27 PREDICTIONS_PARQUET_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
28 TEST_STATISTICS_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
29 TRAIN_SET_METADATA_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
30 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
31 from ludwig.utils.data_utils import get_split_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
32 from ludwig.visualize import get_visualizations_registry |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
33 from sklearn.model_selection import train_test_split |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
34 from utils import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
35 build_tabbed_html, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
36 encode_image_to_base64, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
37 get_html_closing, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
38 get_html_template, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
39 get_metrics_help_modal |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
40 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
41 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
42 # --- Logging Setup --- |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
43 logging.basicConfig( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
44 level=logging.INFO, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
45 format='%(asctime)s %(levelname)s %(name)s: %(message)s', |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
46 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
47 logger = logging.getLogger("ImageLearner") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
48 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
49 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
50 def format_config_table_html( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
51 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
52 split_info: Optional[str] = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
53 training_progress: dict = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
54 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
55 display_keys = [ |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
56 "task_type", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
57 "model_name", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
58 "epochs", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
59 "batch_size", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
60 "fine_tune", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
61 "use_pretrained", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
62 "learning_rate", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
63 "random_seed", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
64 "early_stop", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
65 ] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
66 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
67 rows = [] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
68 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
69 for key in display_keys: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
70 val = config.get(key, "N/A") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
71 if key == "task_type": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
72 val = val.title() if isinstance(val, str) else val |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
73 if key == "batch_size": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
74 if val is not None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
75 val = int(val) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
76 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
77 if training_progress: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
78 val = "Auto-selected batch size by Ludwig:<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
79 resolved_val = training_progress.get("batch_size") |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
80 val += f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
81 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
82 val = "auto" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
83 if key == "learning_rate": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
84 resolved_val = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
85 if val is None or val == "auto": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
86 if training_progress: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
87 resolved_val = training_progress.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
88 val = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
89 "Auto-selected learning rate by Ludwig:<br>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
90 f"<span style='font-size: 0.85em;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
91 f"{resolved_val if resolved_val else val}</span><br>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
92 "<span style='font-size: 0.85em;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
93 "Based on model architecture and training setup " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
94 "(e.g., fine-tuning).<br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
95 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
96 "#trainer-parameters' target='_blank'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
97 "Ludwig Trainer Parameters</a> for details." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
98 "</span>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
99 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
100 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
101 val = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
102 "Auto-selected by Ludwig<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
103 "<span style='font-size: 0.85em;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
104 "Automatically tuned based on architecture and dataset.<br>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
105 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
106 "#trainer-parameters' target='_blank'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
107 "Ludwig Trainer Parameters</a> for details." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
108 "</span>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
109 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
110 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
111 val = f"{val:.6f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
112 if key == "epochs": |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
113 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
114 training_progress |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
115 and "epoch" in training_progress |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
116 and val > training_progress["epoch"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
117 ): |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
118 val = ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
119 f"Because of early stopping: the training " |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
120 f"stopped at epoch {training_progress['epoch']}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
121 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
122 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
123 if val is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
124 continue |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
125 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
126 f"<tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
127 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
128 f"{key.replace('_', ' ').title()}</td>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
129 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
130 f"{val}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
131 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
132 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
133 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
134 aug_cfg = config.get("augmentation") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
135 if aug_cfg: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
136 types = [str(a.get("type", "")) for a in aug_cfg] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
137 aug_val = ", ".join(types) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
138 rows.append( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
139 "<tr>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
140 "<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>Augmentation</td>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
141 "<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
142 f"{aug_val}</td>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
143 "</tr>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
144 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
145 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
146 if split_info: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
147 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
148 f"<tr>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
149 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
150 f"Data Split</td>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
151 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
152 f"{split_info}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
153 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
154 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
155 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
156 return ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
157 "<h2 style='text-align: center;'>Training Setup</h2>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
158 "<div style='display: flex; justify-content: center;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
159 "<table style='border-collapse: collapse; width: 60%; table-layout: auto;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
160 "<thead><tr>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
161 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
162 "Parameter</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
163 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
164 "Value</th>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
165 "</tr></thead><tbody>" + "".join(rows) + "</tbody></table></div><br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
166 "<p style='text-align: center; font-size: 0.9em;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
167 "Model trained using Ludwig.<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
168 "If want to learn more about Ludwig default settings," |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
169 "please check their <a href='https://ludwig.ai' target='_blank'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
170 "website(ludwig.ai)</a>." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
171 "</p><hr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
172 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
173 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
174 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
175 def detect_output_type(test_stats): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
176 """Detects if the output type is 'binary' or 'category' based on test statistics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
177 label_stats = test_stats.get("label", {}) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
178 if "mean_squared_error" in label_stats: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
179 return "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
180 per_class = label_stats.get("per_class_stats", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
181 if len(per_class) == 2: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
182 return "binary" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
183 return "category" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
184 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
185 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
186 def extract_metrics_from_json( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
187 train_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
188 test_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
189 output_type: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
190 ) -> dict: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
191 """Extracts relevant metrics from training and test statistics based on the output type.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
192 metrics = {"training": {}, "validation": {}, "test": {}} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
193 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
194 def get_last_value(stats, key): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
195 val = stats.get(key) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
196 if isinstance(val, list) and val: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
197 return val[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
198 elif isinstance(val, (int, float)): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
199 return val |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
200 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
201 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
202 for split in ["training", "validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
203 split_stats = train_stats.get(split, {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
204 if not split_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
205 logging.warning(f"No statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
206 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
207 label_stats = split_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
208 if not label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
209 logging.warning(f"No label statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
210 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
211 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
212 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
213 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
214 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
215 "precision": get_last_value(label_stats, "precision"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
216 "recall": get_last_value(label_stats, "recall"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
217 "specificity": get_last_value(label_stats, "specificity"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
218 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
219 } |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
220 elif output_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
221 metrics[split] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
222 "loss": get_last_value(label_stats, "loss"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
223 "mean_absolute_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
224 label_stats, "mean_absolute_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
225 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
226 "mean_absolute_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
227 label_stats, "mean_absolute_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
228 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
229 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
230 "root_mean_squared_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
231 label_stats, "root_mean_squared_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
232 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
233 "root_mean_squared_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
234 label_stats, "root_mean_squared_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
235 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
236 "r2": get_last_value(label_stats, "r2"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
237 } |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
238 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
239 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
240 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
241 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
242 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
243 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
244 "hits_at_k": get_last_value(label_stats, "hits_at_k"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
245 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
246 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
247 # Test metrics: dynamic extraction according to exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
248 test_label_stats = test_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
249 if not test_label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
250 logging.warning("No label statistics found for test split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
251 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
252 combined_stats = test_stats.get("combined", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
253 overall_stats = test_label_stats.get("overall_stats", {}) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
254 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
255 # Define exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
256 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
257 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
258 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
259 exclude = {"per_class_stats", "confusion_matrix"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
260 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
261 # 1. Get all scalar test_label_stats not excluded |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
262 test_metrics = {} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
263 for k, v in test_label_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
264 if k in exclude: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
265 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
266 if k == "overall_stats": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
267 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
268 if isinstance(v, (int, float, str, bool)): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
269 test_metrics[k] = v |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
270 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
271 # 2. Add overall_stats (flattened) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
272 for k, v in overall_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
273 test_metrics[k] = v |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
274 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
275 # 3. Optionally include combined/loss if present and not already |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
276 if "loss" in combined_stats and "loss" not in test_metrics: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
277 test_metrics["loss"] = combined_stats["loss"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
278 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
279 metrics["test"] = test_metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
280 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
281 return metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
282 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
283 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
284 def generate_table_row(cells, styles): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
285 """Helper function to generate an HTML table row.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
286 return ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
287 "<tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
288 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
289 + "</tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
290 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
291 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
292 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
293 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
294 """Formats a combined HTML table for training, validation, and test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
295 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
296 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
297 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
298 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
299 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
300 metric_key in all_metrics["validation"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
301 and metric_key in all_metrics["test"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
302 ): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
303 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
304 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
305 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
306 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
307 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
308 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
309 te = all_metrics["test"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
310 if all(x is not None for x in [t, v, te]): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
311 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
312 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
313 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
314 return "<table><tr><td>No metric values found.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
315 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
316 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
317 "<h2 style='text-align: center;'>Model Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
318 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
319 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
320 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
321 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
322 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
323 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
324 "white-space: nowrap;'>Train</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
325 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
326 "white-space: nowrap;'>Validation</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
327 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
328 "white-space: nowrap;'>Test</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
329 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
330 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
331 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
332 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
333 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
334 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
335 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
336 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
337 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
338 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
339 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
340 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
341 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
342 """Formats an HTML table for training and validation metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
343 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
344 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
345 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
346 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
347 if metric_key in all_metrics["validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
348 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
349 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
350 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
351 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
352 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
353 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
354 if t is not None and v is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
355 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
356 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
357 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
358 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
359 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
360 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
361 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
362 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
363 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
364 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
365 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
366 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
367 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
368 "white-space: nowrap;'>Train</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
369 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
370 "white-space: nowrap;'>Validation</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
371 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
372 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
373 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
374 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
375 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
376 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
377 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
378 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
379 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
380 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
381 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
382 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
383 def format_test_merged_stats_table_html( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
384 test_metrics: Dict[str, Optional[float]], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
385 ) -> str: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
386 """Formats an HTML table for test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
387 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
388 for key in sorted(test_metrics.keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
389 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
390 value = test_metrics[key] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
391 if value is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
392 rows.append([display_name, f"{value:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
393 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
394 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
395 return "<table><tr><td>No test metric values found.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
396 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
397 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
398 "<h2 style='text-align: center;'>Test Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
399 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
400 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
401 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
402 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
403 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
404 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
405 "white-space: nowrap;'>Test</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
406 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
407 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
408 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
409 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
410 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
411 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
412 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
413 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
414 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
415 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
416 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
417 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
418 def split_data_0_2( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
419 df: pd.DataFrame, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
420 split_column: str, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
421 validation_size: float = 0.15, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
422 random_state: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
423 label_column: Optional[str] = None, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
424 ) -> pd.DataFrame: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
425 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
426 out = df.copy() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
427 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
428 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
429 idx_train = out.index[out[split_column] == 0].tolist() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
430 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
431 if not idx_train: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
432 logger.info("No rows with split=0; nothing to do.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
433 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
434 stratify_arr = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
435 if label_column and label_column in out.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
436 label_counts = out.loc[idx_train, label_column].value_counts() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
437 if label_counts.size > 1 and (label_counts.min() * validation_size) >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
438 stratify_arr = out.loc[idx_train, label_column] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
439 else: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
440 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
441 "Cannot stratify (too few labels); splitting without stratify." |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
442 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
443 if validation_size <= 0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
444 logger.info("validation_size <= 0; keeping all as train.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
445 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
446 if validation_size >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
447 logger.info("validation_size >= 1; moving all train → validation.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
448 out.loc[idx_train, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
449 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
450 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
451 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
452 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
453 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
454 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
455 stratify=stratify_arr, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
456 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
457 except ValueError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
458 logger.warning(f"Stratified split failed ({e}); retrying without stratify.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
459 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
460 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
461 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
462 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
463 stratify=None, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
464 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
465 out.loc[train_idx, split_column] = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
466 out.loc[val_idx, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
467 out[split_column] = out[split_column].astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
468 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
469 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
470 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
471 class Backend(Protocol): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
472 """Interface for a machine learning backend.""" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
473 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
474 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
475 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
476 config_params: Dict[str, Any], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
477 split_config: Dict[str, Any], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
478 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
479 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
480 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
481 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
482 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
483 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
484 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
485 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
486 random_seed: int, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
487 ) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
488 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
489 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
490 def generate_plots(self, output_dir: Path) -> None: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
491 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
492 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
493 def generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
494 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
495 title: str, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
496 output_dir: str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
497 config: Dict[str, Any], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
498 split_info: str, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
499 ) -> Path: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
500 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
501 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
502 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
503 class LudwigDirectBackend: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
504 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
505 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
506 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
507 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
508 config_params: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
509 split_config: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
510 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
511 logger.info("LudwigDirectBackend: Preparing YAML configuration.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
512 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
513 model_name = config_params.get("model_name", "resnet18") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
514 use_pretrained = config_params.get("use_pretrained", False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
515 fine_tune = config_params.get("fine_tune", False) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
516 if use_pretrained: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
517 trainable = bool(fine_tune) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
518 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
519 trainable = True |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
520 epochs = config_params.get("epochs", 10) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
521 batch_size = config_params.get("batch_size") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
522 num_processes = config_params.get("preprocessing_num_processes", 1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
523 early_stop = config_params.get("early_stop", None) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
524 learning_rate = config_params.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
525 learning_rate = "auto" if learning_rate is None else float(learning_rate) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
526 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
527 if isinstance(raw_encoder, dict): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
528 encoder_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
529 **raw_encoder, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
530 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
531 "trainable": trainable, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
532 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
533 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
534 encoder_config = {"type": raw_encoder} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
535 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
536 batch_size_cfg = batch_size or "auto" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
537 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
538 label_column_path = config_params.get("label_column_data_path") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
539 label_series = None |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
540 if label_column_path is not None and Path(label_column_path).exists(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
541 try: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
542 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
543 except Exception as e: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
544 logger.warning(f"Could not read label column for task detection: {e}") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
545 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
546 if ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
547 label_series is not None |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
548 and ptypes.is_numeric_dtype(label_series.dtype) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
549 and label_series.nunique() > 10 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
550 ): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
551 task_type = "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
552 else: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
553 task_type = "classification" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
554 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
555 config_params["task_type"] = task_type |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
556 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
557 image_feat: Dict[str, Any] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
558 "name": IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
559 "type": "image", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
560 "encoder": encoder_config, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
561 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
562 if config_params.get("augmentation") is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
563 image_feat["augmentation"] = config_params["augmentation"] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
564 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
565 if task_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
566 output_feat = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
567 "name": LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
568 "type": "number", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
569 "decoder": {"type": "regressor"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
570 "loss": {"type": "mean_squared_error"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
571 "evaluation": { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
572 "metrics": [ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
573 "mean_squared_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
574 "mean_absolute_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
575 "r2", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
576 ] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
577 }, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
578 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
579 val_metric = config_params.get("validation_metric", "mean_squared_error") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
580 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
581 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
582 num_unique_labels = ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
583 label_series.nunique() if label_series is not None else 2 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
584 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
585 output_type = "binary" if num_unique_labels == 2 else "category" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
586 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type} |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
587 val_metric = None |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
588 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
589 conf: Dict[str, Any] = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
590 "model_type": "ecd", |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
591 "input_features": [image_feat], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
592 "output_features": [output_feat], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
593 "combiner": {"type": "concat"}, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
594 "trainer": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
595 "epochs": epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
596 "early_stop": early_stop, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
597 "batch_size": batch_size_cfg, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
598 "learning_rate": learning_rate, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
599 # only set validation_metric for regression |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
600 **({"validation_metric": val_metric} if val_metric else {}), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
601 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
602 "preprocessing": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
603 "split": split_config, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
604 "num_processes": num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
605 "in_memory": False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
606 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
607 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
608 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
609 logger.debug("LudwigDirectBackend: Config dict built.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
610 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
611 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
612 logger.info("LudwigDirectBackend: YAML config generated.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
613 return yaml_str |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
614 except Exception: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
615 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
616 "LudwigDirectBackend: Failed to serialize YAML.", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
617 exc_info=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
618 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
619 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
620 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
621 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
622 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
623 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
624 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
625 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
626 random_seed: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
627 ) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
628 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
629 logger.info("LudwigDirectBackend: Starting experiment execution.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
630 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
631 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
632 from ludwig.experiment import experiment_cli |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
633 except ImportError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
634 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
635 "LudwigDirectBackend: Could not import experiment_cli.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
636 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
637 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
638 raise RuntimeError("Ludwig import failed.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
639 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
640 output_dir.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
641 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
642 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
643 experiment_cli( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
644 dataset=str(dataset_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
645 config=str(config_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
646 output_directory=str(output_dir), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
647 random_seed=random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
648 ) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
649 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
650 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
651 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
652 except TypeError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
653 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
654 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
655 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
656 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
657 raise RuntimeError("Ludwig argument error.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
658 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
659 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
660 "LudwigDirectBackend: Experiment execution error.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
661 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
662 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
663 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
664 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
665 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
666 """Retrieve the learning rate used in the most recent Ludwig run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
667 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
668 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
669 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
670 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
671 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
672 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
673 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
674 logger.warning(f"No experiment run directories found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
675 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
676 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
677 progress_file = exp_dirs[-1] / "model" / "training_progress.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
678 if not progress_file.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
679 logger.warning(f"No training_progress.json found in {progress_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
680 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
681 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
682 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
683 with progress_file.open("r", encoding="utf-8") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
684 data = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
685 return { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
686 "learning_rate": data.get("learning_rate"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
687 "batch_size": data.get("batch_size"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
688 "epoch": data.get("epoch"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
689 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
690 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
691 logger.warning(f"Failed to read training progress info: {e}") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
692 return {} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
693 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
694 def convert_parquet_to_csv(self, output_dir: Path): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
695 """Convert the predictions Parquet file to CSV.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
696 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
697 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
698 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
699 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
700 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
701 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
702 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
703 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
704 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
705 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
706 csv_path = exp_dir / "predictions.csv" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
707 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
708 df = pd.read_parquet(parquet_path) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
709 df.to_csv(csv_path, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
710 logger.info(f"Converted Parquet to CSV: {csv_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
711 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
712 logger.error(f"Error converting Parquet to CSV: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
713 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
714 def generate_plots(self, output_dir: Path) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
715 """Generate all registered Ludwig visualizations for the latest experiment run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
716 logger.info("Generating all Ludwig visualizations…") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
717 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
718 test_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
719 "compare_performance", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
720 "compare_classifiers_performance_from_prob", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
721 "compare_classifiers_performance_from_pred", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
722 "compare_classifiers_performance_changing_k", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
723 "compare_classifiers_multiclass_multimetric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
724 "compare_classifiers_predictions", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
725 "confidence_thresholding_2thresholds_2d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
726 "confidence_thresholding_2thresholds_3d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
727 "confidence_thresholding", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
728 "confidence_thresholding_data_vs_acc", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
729 "binary_threshold_vs_metric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
730 "roc_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
731 "roc_curves_from_test_statistics", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
732 "calibration_1_vs_all", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
733 "calibration_multiclass", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
734 "confusion_matrix", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
735 "frequency_vs_f1", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
736 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
737 train_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
738 "learning_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
739 "compare_classifiers_performance_subset", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
740 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
741 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
742 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
743 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
744 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
745 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
746 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
747 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
748 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
749 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
750 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
751 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
752 viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
753 viz_dir.mkdir(exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
754 train_viz = viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
755 test_viz = viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
756 train_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
757 test_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
758 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
759 def _check(p: Path) -> Optional[str]: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
760 return str(p) if p.exists() else None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
761 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
762 training_stats = _check(exp_dir / "training_statistics.json") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
763 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
764 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
765 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
766 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
767 dataset_path = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
768 split_file = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
769 desc = exp_dir / DESCRIPTION_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
770 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
771 with open(desc, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
772 cfg = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
773 dataset_path = _check(Path(cfg.get("dataset", ""))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
774 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
775 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
776 output_feature = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
777 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
778 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
779 output_feature = cfg["config"]["output_features"][0]["name"] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
780 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
781 pass |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
782 if not output_feature and test_stats: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
783 with open(test_stats, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
784 stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
785 output_feature = next(iter(stats.keys()), "") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
786 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
787 viz_registry = get_visualizations_registry() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
788 for viz_name, viz_func in viz_registry.items(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
789 if viz_name in train_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
790 viz_dir_plot = train_viz |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
791 elif viz_name in test_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
792 viz_dir_plot = test_viz |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
793 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
794 continue |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
795 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
796 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
797 viz_func( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
798 training_statistics=[training_stats] if training_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
799 test_statistics=[test_stats] if test_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
800 probabilities=[probs_path] if probs_path else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
801 output_feature_name=output_feature, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
802 ground_truth_split=2, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
803 top_n_classes=[0], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
804 top_k=3, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
805 ground_truth_metadata=gt_metadata, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
806 ground_truth=dataset_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
807 split_file=split_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
808 output_directory=str(viz_dir_plot), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
809 normalize=False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
810 file_format="png", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
811 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
812 logger.info(f"✔ Generated {viz_name}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
813 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
814 logger.warning(f"✘ Skipped {viz_name}: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
815 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
816 logger.info(f"All visualizations written to {viz_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
817 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
818 def generate_html_report( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
819 self, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
820 title: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
821 output_dir: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
822 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
823 split_info: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
824 ) -> Path: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
825 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
826 cwd = Path.cwd() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
827 report_name = title.lower().replace(" ", "_") + "_report.html" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
828 report_path = cwd / report_name |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
829 output_dir = Path(output_dir) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
830 output_type = None |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
831 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
832 exp_dirs = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
833 output_dir.glob("experiment_run*"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
834 key=lambda p: p.stat().st_mtime, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
835 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
836 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
837 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
838 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
839 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
840 base_viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
841 train_viz_dir = base_viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
842 test_viz_dir = base_viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
843 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
844 html = get_html_template() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
845 html += f"<h1>{title}</h1>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
846 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
847 metrics_html = "" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
848 train_val_metrics_html = "" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
849 test_metrics_html = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
850 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
851 train_stats_path = exp_dir / "training_statistics.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
852 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
853 if train_stats_path.exists() and test_stats_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
854 with open(train_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
855 train_stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
856 with open(test_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
857 test_stats = json.load(f) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
858 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
859 metrics_html = format_stats_table_html(train_stats, test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
860 train_val_metrics_html = format_train_val_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
861 train_stats, test_stats |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
862 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
863 test_metrics_html = format_test_merged_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
864 extract_metrics_from_json(train_stats, test_stats, output_type)[ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
865 "test" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
866 ] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
867 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
868 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
869 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
870 f"Could not load stats for HTML report: {type(e).__name__}: {e}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
871 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
872 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
873 config_html = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
874 training_progress = self.get_training_process(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
875 try: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
876 config_html = format_config_table_html( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
877 config, split_info, training_progress |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
878 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
879 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
880 logger.warning(f"Could not load config for HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
881 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
882 def render_img_section( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
883 title: str, dir_path: Path, output_type: str = None |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
884 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
885 if not dir_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
886 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
887 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
888 imgs = list(dir_path.glob("*.png")) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
889 if not imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
890 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
891 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
892 if title == "Test Visualizations" and output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
893 order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
894 "confusion_matrix__label_top2.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
895 "roc_curves_from_prediction_statistics.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
896 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
897 "confusion_matrix_entropy__label_top2.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
898 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
899 img_names = {img.name: img for img in imgs} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
900 ordered_imgs = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
901 img_names[fname] for fname in order if fname in img_names |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
902 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
903 remaining = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
904 [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
905 img |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
906 for img in imgs |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
907 if img.name not in order and img.name != "roc_curves.png" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
908 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
909 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
910 imgs = ordered_imgs + remaining |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
911 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
912 elif title == "Test Visualizations" and output_type == "category": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
913 unwanted = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
914 "compare_classifiers_multiclass_multimetric__label_best10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
915 "compare_classifiers_multiclass_multimetric__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
916 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
917 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
918 display_order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
919 "confusion_matrix__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
920 "roc_curves.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
921 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
922 "compare_classifiers_performance_from_prob.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
923 "compare_classifiers_multiclass_multimetric__label_sorted.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
924 "confusion_matrix_entropy__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
925 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
926 img_names = {img.name: img for img in imgs if img.name not in unwanted} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
927 ordered_imgs = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
928 img_names[fname] for fname in display_order if fname in img_names |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
929 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
930 remaining = sorted( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
931 [img for img in img_names.values() if img.name not in display_order] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
932 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
933 imgs = ordered_imgs + remaining |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
934 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
935 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
936 if output_type == "category": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
937 unwanted = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
938 "compare_classifiers_multiclass_multimetric__label_best10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
939 "compare_classifiers_multiclass_multimetric__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
940 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
941 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
942 imgs = sorted([img for img in imgs if img.name not in unwanted]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
943 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
944 imgs = sorted(imgs) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
945 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
946 section_html = f"<h2 style='text-align: center;'>{title}</h2><div>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
947 for img in imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
948 b64 = encode_image_to_base64(str(img)) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
949 section_html += ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
950 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
951 f"<h3>{img.stem.replace('_', ' ').title()}</h3>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
952 f'<img src="data:image/png;base64,{b64}" ' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
953 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
954 f"</div>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
955 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
956 section_html += "</div>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
957 return section_html |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
958 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
959 tab1_content = config_html + metrics_html |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
960 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
961 tab2_content = train_val_metrics_html + render_img_section( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
962 "Training & Validation Visualizations", train_viz_dir |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
963 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
964 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
965 # --- Predictions vs Ground Truth table --- |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
966 preds_section = "" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
967 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
968 if parquet_path.exists(): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
969 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
970 # 1) load predictions from Parquet |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
971 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
972 # assume the column containing your model's prediction is named "prediction" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
973 # or contains that substring: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
974 pred_col = next( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
975 (c for c in df_preds.columns if "prediction" in c.lower()), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
976 None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
977 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
978 if pred_col is None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
979 raise ValueError("No prediction column found in Parquet output") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
980 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
981 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
982 # 2) load ground truth for the test split from prepared CSV |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
983 df_all = pd.read_csv(config["label_column_data_path"]) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
984 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
985 LABEL_COLUMN_NAME |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
986 ].reset_index(drop=True) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
987 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
988 # 3) concatenate side‐by‐side |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
989 df_table = pd.concat([df_gt, df_pred], axis=1) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
990 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
991 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
992 # 4) render as HTML |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
993 preds_html = df_table.to_html(index=False, classes="predictions-table") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
994 preds_section = ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
995 "<h2 style='text-align: center;'>Predictions vs. Ground Truth</h2>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
996 "<div style='overflow-x:auto; margin-bottom:20px;'>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
997 + preds_html |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
998 + "</div>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
999 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1000 except Exception as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1001 logger.warning(f"Could not build Predictions vs GT table: {e}") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1002 # Test tab = Metrics + Preds table + Visualizations |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1003 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1004 tab3_content = ( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1005 test_metrics_html |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1006 + preds_section |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1007 + render_img_section("Test Visualizations", test_viz_dir, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1008 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1009 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1010 # assemble the tabs and help modal |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1011 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1012 modal_html = get_metrics_help_modal() |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1013 html += tabbed_html + modal_html + get_html_closing() |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1014 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1015 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1016 with open(report_path, "w") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1017 f.write(html) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1018 logger.info(f"HTML report generated at: {report_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1019 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1020 logger.error(f"Failed to write HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1021 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1022 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1023 return report_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1024 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1025 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1026 class WorkflowOrchestrator: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1027 """Manages the image-classification workflow.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1028 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1029 def __init__(self, args: argparse.Namespace, backend: Backend): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1030 self.args = args |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1031 self.backend = backend |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1032 self.temp_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1033 self.image_extract_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1034 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1035 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1036 def _create_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1037 """Create temporary output and image extraction directories.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1038 try: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1039 self.temp_dir = Path( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1040 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1041 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1042 self.image_extract_dir = self.temp_dir / "images" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1043 self.image_extract_dir.mkdir() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1044 logger.info(f"Created temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1045 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1046 logger.error("Failed to create temporary directories", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1047 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1048 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1049 def _extract_images(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1050 """Extract images from ZIP into the temp image directory.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1051 if self.image_extract_dir is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1052 raise RuntimeError("Temp image directory not initialized.") |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1053 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1054 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1055 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1056 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1057 with zipfile.ZipFile(self.args.image_zip, "r") as z: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1058 z.extractall(self.image_extract_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1059 logger.info("Image extraction complete.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1060 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1061 logger.error("Error extracting zip file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1062 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1063 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1064 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1065 """Load CSV, update image paths, handle splits, and write prepared CSV.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1066 if not self.temp_dir or not self.image_extract_dir: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1067 raise RuntimeError("Temp dirs not initialized before data prep.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1068 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1069 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1070 df = pd.read_csv(self.args.csv_file) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1071 logger.info(f"Loaded CSV: {self.args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1072 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1073 logger.error("Error loading CSV file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1074 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1075 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1076 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1077 missing = required - set(df.columns) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1078 if missing: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1079 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1080 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1081 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1082 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1083 lambda p: str((self.image_extract_dir / p).resolve()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1084 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1085 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1086 logger.error("Error updating image paths", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1087 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1088 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1089 if SPLIT_COLUMN_NAME in df.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1090 df, split_config, split_info = self._process_fixed_split(df) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1091 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1092 logger.info("No split column; using random split") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1093 split_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1094 "type": "random", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1095 "probabilities": self.args.split_probabilities, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1096 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1097 split_info = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1098 f"No split column in CSV. Used random split: " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1099 f"{[int(p * 100) for p in self.args.split_probabilities]}% " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1100 f"for train/val/test." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1101 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1102 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1103 final_csv = self.temp_dir / TEMP_CSV_FILENAME |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1104 try: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1105 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1106 df.to_csv(final_csv, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1107 logger.info(f"Saved prepared data to {final_csv}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1108 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1109 logger.error("Error saving prepared CSV", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1110 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1111 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1112 return final_csv, split_config, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1113 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1114 def _process_fixed_split( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1115 self, df: pd.DataFrame |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1116 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1117 """Process a fixed split column (0=train,1=val,2=test).""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1118 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1119 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1120 col = df[SPLIT_COLUMN_NAME] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1121 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1122 pd.Int64Dtype() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1123 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1124 if df[SPLIT_COLUMN_NAME].isna().any(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1125 logger.warning("Split column contains non-numeric/missing values.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1126 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1127 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1128 logger.info(f"Unique split values: {unique}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1129 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1130 if unique == {0, 2}: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1131 df = split_data_0_2( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1132 df, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1133 SPLIT_COLUMN_NAME, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1134 validation_size=self.args.validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1135 label_column=LABEL_COLUMN_NAME, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1136 random_state=self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1137 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1138 split_info = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1139 "Detected a split column (with values 0 and 2) in the input CSV. " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1140 f"Used this column as a base and reassigned " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1141 f"{self.args.validation_size * 100:.1f}% " |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1142 "of the training set (originally labeled 0) to validation (labeled 1)." |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1143 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1144 logger.info("Applied custom 0/2 split.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1145 elif unique.issubset({0, 1, 2}): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1146 split_info = "Used user-defined split column from CSV." |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1147 logger.info("Using fixed split as-is.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1148 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1149 raise ValueError(f"Unexpected split values: {unique}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1150 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1151 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1152 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1153 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1154 logger.error("Error processing fixed split", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1155 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1156 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1157 def _cleanup_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1158 if self.temp_dir and self.temp_dir.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1159 logger.info(f"Cleaning up temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1160 shutil.rmtree(self.temp_dir, ignore_errors=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1161 self.temp_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1162 self.image_extract_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1163 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1164 def run(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1165 """Execute the full workflow end-to-end.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1166 logger.info("Starting workflow...") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1167 self.args.output_dir.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1168 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1169 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1170 self._create_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1171 self._extract_images() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1172 csv_path, split_cfg, split_info = self._prepare_data() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1173 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1174 use_pretrained = self.args.use_pretrained or self.args.fine_tune |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1175 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1176 backend_args = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1177 "model_name": self.args.model_name, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1178 "fine_tune": self.args.fine_tune, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1179 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1180 "epochs": self.args.epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1181 "batch_size": self.args.batch_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1182 "preprocessing_num_processes": self.args.preprocessing_num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1183 "split_probabilities": self.args.split_probabilities, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1184 "learning_rate": self.args.learning_rate, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1185 "random_seed": self.args.random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1186 "early_stop": self.args.early_stop, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1187 "label_column_data_path": csv_path, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1188 "augmentation": self.args.augmentation, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1189 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1190 yaml_str = self.backend.prepare_config(backend_args, split_cfg) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1191 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1192 config_file = self.temp_dir / TEMP_CONFIG_FILENAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1193 config_file.write_text(yaml_str) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1194 logger.info(f"Wrote backend config: {config_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1195 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1196 self.backend.run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1197 csv_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1198 config_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1199 self.args.output_dir, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1200 self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1201 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1202 logger.info("Workflow completed successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1203 self.backend.generate_plots(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1204 report_file = self.backend.generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1205 "Image Classification Results", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1206 self.args.output_dir, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1207 backend_args, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1208 split_info, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1209 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1210 logger.info(f"HTML report generated at: {report_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1211 self.backend.convert_parquet_to_csv(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1212 logger.info("Converted Parquet to CSV.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1213 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1214 logger.error("Workflow execution failed", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1215 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1216 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1217 self._cleanup_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1218 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1219 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1220 def parse_learning_rate(s): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1221 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1222 return float(s) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1223 except (TypeError, ValueError): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1224 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1225 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1226 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1227 def aug_parse(aug_string: str): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1228 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1229 Parse comma-separated augmentation keys into Ludwig augmentation dicts. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1230 Raises ValueError on unknown key. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1231 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1232 mapping = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1233 "random_horizontal_flip": {"type": "random_horizontal_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1234 "random_vertical_flip": {"type": "random_vertical_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1235 "random_rotate": {"type": "random_rotate", "degree": 10}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1236 "random_blur": {"type": "random_blur", "kernel_size": 3}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1237 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1238 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1239 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1240 aug_list = [] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1241 for tok in aug_string.split(","): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1242 key = tok.strip() |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1243 if key not in mapping: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1244 valid = ", ".join(mapping.keys()) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1245 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1246 aug_list.append(mapping[key]) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1247 return aug_list |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1248 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1249 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1250 class SplitProbAction(argparse.Action): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1251 def __call__(self, parser, namespace, values, option_string=None): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1252 train, val, test = values |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1253 total = train + val + test |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1254 if abs(total - 1.0) > 1e-6: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1255 parser.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1256 f"--split-probabilities must sum to 1.0; " |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1257 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1258 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1259 setattr(namespace, self.dest, values) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1260 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1261 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1262 def main(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1263 parser = argparse.ArgumentParser( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1264 description="Image Classification Learner with Pluggable Backends", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1265 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1266 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1267 "--csv-file", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1268 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1269 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1270 help="Path to the input CSV", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1271 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1272 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1273 "--image-zip", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1274 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1275 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1276 help="Path to the images ZIP", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1277 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1278 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1279 "--model-name", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1280 required=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1281 choices=MODEL_ENCODER_TEMPLATES.keys(), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1282 help="Which model template to use", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1283 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1284 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1285 "--use-pretrained", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1286 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1287 help="Use pretrained weights for the model", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1288 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1289 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1290 "--fine-tune", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1291 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1292 help="Enable fine-tuning", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1293 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1294 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1295 "--epochs", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1296 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1297 default=10, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1298 help="Number of training epochs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1299 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1300 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1301 "--early-stop", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1302 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1303 default=5, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1304 help="Early stopping patience", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1305 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1306 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1307 "--batch-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1308 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1309 help="Batch size (None = auto)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1310 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1311 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1312 "--output-dir", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1313 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1314 default=Path("learner_output"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1315 help="Where to write outputs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1316 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1317 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1318 "--validation-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1319 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1320 default=0.15, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1321 help="Fraction for validation (0.0–1.0)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1322 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1323 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1324 "--preprocessing-num-processes", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1325 type=int, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1326 default=max(1, os.cpu_count() // 2), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1327 help="CPU processes for data prep", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1328 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1329 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1330 "--split-probabilities", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1331 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1332 nargs=3, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1333 metavar=("train", "val", "test"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1334 action=SplitProbAction, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1335 default=[0.7, 0.1, 0.2], |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1336 help=( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1337 "Random split proportions (e.g., 0.7 0.1 0.2)." |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1338 "Only used if no split column." |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1339 ), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1340 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1341 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1342 "--random-seed", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1343 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1344 default=42, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1345 help="Random seed used for dataset splitting (default: 42)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1346 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1347 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1348 "--learning-rate", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1349 type=parse_learning_rate, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1350 default=None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1351 help="Learning rate. If not provided, Ludwig will auto-select it.", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1352 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1353 parser.add_argument( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1354 "--augmentation", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1355 type=str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1356 default=None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1357 help=( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1358 "Comma-separated list (in order) of any of: " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1359 "random_horizontal_flip, random_vertical_flip, random_rotate, " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1360 "random_blur, random_brightness, random_contrast. " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1361 "E.g. --augmentation random_horizontal_flip,random_rotate" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1362 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1363 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1364 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1365 args = parser.parse_args() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1366 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1367 if not 0.0 <= args.validation_size <= 1.0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1368 parser.error("validation-size must be between 0.0 and 1.0") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1369 if not args.csv_file.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1370 parser.error(f"CSV not found: {args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1371 if not args.image_zip.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1372 parser.error(f"ZIP not found: {args.image_zip}") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1373 if args.augmentation is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1374 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1375 augmentation_setup = aug_parse(args.augmentation) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1376 setattr(args, "augmentation", augmentation_setup) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1377 except ValueError as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1378 parser.error(str(e)) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1379 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1380 backend_instance = LudwigDirectBackend() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1381 orchestrator = WorkflowOrchestrator(args, backend_instance) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1382 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1383 exit_code = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1384 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1385 orchestrator.run() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1386 logger.info("Main script finished successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1387 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1388 logger.error(f"Main script failed.{e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1389 exit_code = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1390 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1391 sys.exit(exit_code) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1392 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1393 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1394 if __name__ == "__main__": |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1395 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1396 import ludwig |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1397 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1398 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1399 except ImportError: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1400 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1401 "Ludwig library not found. Please ensure Ludwig is installed " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1402 "('pip install ludwig[image]')" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1403 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1404 sys.exit(1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1405 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1406 main() |