Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 1:39202fe5cf97 draft
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
author | goeckslab |
---|---|
date | Wed, 02 Jul 2025 18:59:10 +0000 |
parents | 54b871dfc51e |
children | 186424a7eca7 |
rev | line source |
---|---|
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2 import argparse |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
3 import json |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
4 import logging |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
5 import os |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
6 import shutil |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
7 import sys |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
8 import tempfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
9 import zipfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
10 from pathlib import Path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
11 from typing import Any, Dict, Optional, Protocol, Tuple |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
12 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
13 import pandas as pd |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
14 import yaml |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
15 from ludwig.globals import ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
16 DESCRIPTION_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
17 PREDICTIONS_PARQUET_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
18 TEST_STATISTICS_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
19 TRAIN_SET_METADATA_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
20 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
21 from ludwig.utils.data_utils import get_split_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
22 from ludwig.visualize import get_visualizations_registry |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
23 from sklearn.model_selection import train_test_split |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
24 from utils import encode_image_to_base64, get_html_closing, get_html_template |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
25 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
26 # --- Constants --- |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
27 SPLIT_COLUMN_NAME = "split" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
28 LABEL_COLUMN_NAME = "label" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
29 IMAGE_PATH_COLUMN_NAME = "image_path" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
30 DEFAULT_SPLIT_PROBABILITIES = [0.7, 0.1, 0.2] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
31 TEMP_CSV_FILENAME = "processed_data_for_ludwig.csv" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
32 TEMP_CONFIG_FILENAME = "ludwig_config.yaml" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
33 TEMP_DIR_PREFIX = "ludwig_api_work_" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
34 MODEL_ENCODER_TEMPLATES: Dict[str, Any] = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
35 "stacked_cnn": "stacked_cnn", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
36 "resnet18": {"type": "resnet", "model_variant": 18}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
37 "resnet34": {"type": "resnet", "model_variant": 34}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
38 "resnet50": {"type": "resnet", "model_variant": 50}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
39 "resnet101": {"type": "resnet", "model_variant": 101}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
40 "resnet152": {"type": "resnet", "model_variant": 152}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
41 "resnext50_32x4d": {"type": "resnext", "model_variant": "50_32x4d"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
42 "resnext101_32x8d": {"type": "resnext", "model_variant": "101_32x8d"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
43 "resnext101_64x4d": {"type": "resnext", "model_variant": "101_64x4d"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
44 "resnext152_32x8d": {"type": "resnext", "model_variant": "152_32x8d"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
45 "wide_resnet50_2": {"type": "wide_resnet", "model_variant": "50_2"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
46 "wide_resnet101_2": {"type": "wide_resnet", "model_variant": "101_2"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
47 "wide_resnet103_2": {"type": "wide_resnet", "model_variant": "103_2"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
48 "efficientnet_b0": {"type": "efficientnet", "model_variant": "b0"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
49 "efficientnet_b1": {"type": "efficientnet", "model_variant": "b1"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
50 "efficientnet_b2": {"type": "efficientnet", "model_variant": "b2"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
51 "efficientnet_b3": {"type": "efficientnet", "model_variant": "b3"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
52 "efficientnet_b4": {"type": "efficientnet", "model_variant": "b4"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
53 "efficientnet_b5": {"type": "efficientnet", "model_variant": "b5"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
54 "efficientnet_b6": {"type": "efficientnet", "model_variant": "b6"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
55 "efficientnet_b7": {"type": "efficientnet", "model_variant": "b7"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
56 "efficientnet_v2_s": {"type": "efficientnet", "model_variant": "v2_s"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
57 "efficientnet_v2_m": {"type": "efficientnet", "model_variant": "v2_m"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
58 "efficientnet_v2_l": {"type": "efficientnet", "model_variant": "v2_l"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
59 "regnet_y_400mf": {"type": "regnet", "model_variant": "y_400mf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
60 "regnet_y_800mf": {"type": "regnet", "model_variant": "y_800mf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
61 "regnet_y_1_6gf": {"type": "regnet", "model_variant": "y_1_6gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
62 "regnet_y_3_2gf": {"type": "regnet", "model_variant": "y_3_2gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
63 "regnet_y_8gf": {"type": "regnet", "model_variant": "y_8gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
64 "regnet_y_16gf": {"type": "regnet", "model_variant": "y_16gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
65 "regnet_y_32gf": {"type": "regnet", "model_variant": "y_32gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
66 "regnet_y_128gf": {"type": "regnet", "model_variant": "y_128gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
67 "regnet_x_400mf": {"type": "regnet", "model_variant": "x_400mf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
68 "regnet_x_800mf": {"type": "regnet", "model_variant": "x_800mf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
69 "regnet_x_1_6gf": {"type": "regnet", "model_variant": "x_1_6gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
70 "regnet_x_3_2gf": {"type": "regnet", "model_variant": "x_3_2gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
71 "regnet_x_8gf": {"type": "regnet", "model_variant": "x_8gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
72 "regnet_x_16gf": {"type": "regnet", "model_variant": "x_16gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
73 "regnet_x_32gf": {"type": "regnet", "model_variant": "x_32gf"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
74 "vgg11": {"type": "vgg", "model_variant": 11}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
75 "vgg11_bn": {"type": "vgg", "model_variant": "11_bn"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
76 "vgg13": {"type": "vgg", "model_variant": 13}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
77 "vgg13_bn": {"type": "vgg", "model_variant": "13_bn"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
78 "vgg16": {"type": "vgg", "model_variant": 16}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
79 "vgg16_bn": {"type": "vgg", "model_variant": "16_bn"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
80 "vgg19": {"type": "vgg", "model_variant": 19}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
81 "vgg19_bn": {"type": "vgg", "model_variant": "19_bn"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
82 "shufflenet_v2_x0_5": {"type": "shufflenet_v2", "model_variant": "x0_5"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
83 "shufflenet_v2_x1_0": {"type": "shufflenet_v2", "model_variant": "x1_0"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
84 "shufflenet_v2_x1_5": {"type": "shufflenet_v2", "model_variant": "x1_5"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
85 "shufflenet_v2_x2_0": {"type": "shufflenet_v2", "model_variant": "x2_0"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
86 "squeezenet1_0": {"type": "squeezenet", "model_variant": "1_0"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
87 "squeezenet1_1": {"type": "squeezenet", "model_variant": "1_1"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
88 "swin_t": {"type": "swin_transformer", "model_variant": "t"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
89 "swin_s": {"type": "swin_transformer", "model_variant": "s"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
90 "swin_b": {"type": "swin_transformer", "model_variant": "b"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
91 "swin_v2_t": {"type": "swin_transformer", "model_variant": "v2_t"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
92 "swin_v2_s": {"type": "swin_transformer", "model_variant": "v2_s"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
93 "swin_v2_b": {"type": "swin_transformer", "model_variant": "v2_b"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
94 "vit_b_16": {"type": "vision_transformer", "model_variant": "b_16"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
95 "vit_b_32": {"type": "vision_transformer", "model_variant": "b_32"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
96 "vit_l_16": {"type": "vision_transformer", "model_variant": "l_16"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
97 "vit_l_32": {"type": "vision_transformer", "model_variant": "l_32"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
98 "vit_h_14": {"type": "vision_transformer", "model_variant": "h_14"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
99 "convnext_tiny": {"type": "convnext", "model_variant": "tiny"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
100 "convnext_small": {"type": "convnext", "model_variant": "small"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
101 "convnext_base": {"type": "convnext", "model_variant": "base"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
102 "convnext_large": {"type": "convnext", "model_variant": "large"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
103 "maxvit_t": {"type": "maxvit", "model_variant": "t"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
104 "alexnet": {"type": "alexnet"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
105 "googlenet": {"type": "googlenet"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
106 "inception_v3": {"type": "inception_v3"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
107 "mobilenet_v2": {"type": "mobilenet_v2"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
108 "mobilenet_v3_large": {"type": "mobilenet_v3_large"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
109 "mobilenet_v3_small": {"type": "mobilenet_v3_small"}, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
110 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
111 METRIC_DISPLAY_NAMES = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
112 "accuracy": "Accuracy", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
113 "accuracy_micro": "Accuracy-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
114 "loss": "Loss", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
115 "roc_auc": "ROC-AUC", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
116 "roc_auc_macro": "ROC-AUC-Macro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
117 "roc_auc_micro": "ROC-AUC-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
118 "hits_at_k": "Hits at K", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
119 "precision": "Precision", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
120 "recall": "Recall", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
121 "specificity": "Specificity", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
122 "kappa_score": "Cohen's Kappa", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
123 "token_accuracy": "Token Accuracy", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
124 "avg_precision_macro": "Precision-Macro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
125 "avg_recall_macro": "Recall-Macro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
126 "avg_f1_score_macro": "F1-score-Macro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
127 "avg_precision_micro": "Precision-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
128 "avg_recall_micro": "Recall-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
129 "avg_f1_score_micro": "F1-score-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
130 "avg_precision_weighted": "Precision-Weighted", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
131 "avg_recall_weighted": "Recall-Weighted", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
132 "avg_f1_score_weighted": "F1-score-Weighted", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
133 "average_precision_macro": " Precision-Average-Macro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
134 "average_precision_micro": "Precision-Average-Micro", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
135 "average_precision_samples": "Precision-Average-Samples", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
136 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
137 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
138 # --- Logging Setup --- |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
139 logging.basicConfig( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
140 level=logging.INFO, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
141 format="%(asctime)s %(levelname)s %(name)s: %(message)s", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
142 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
143 logger = logging.getLogger("ImageLearner") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
144 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
145 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
146 def get_metrics_help_modal() -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
147 modal_html = """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
148 <div id="metricsHelpModal" class="modal"> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
149 <div class="modal-content"> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
150 <span class="close">×</span> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
151 <h2>Model Evaluation Metrics — Help Guide</h2> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
152 <div class="metrics-guide"> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
153 <h3>1) General Metrics</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
154 <p><strong>Loss:</strong> Measures the difference between predicted and actual values. Lower is better. Often used for optimization during training.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
155 <p><strong>Accuracy:</strong> Proportion of correct predictions among all predictions. Simple but can be misleading for imbalanced datasets.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
156 <p><strong>Micro Accuracy:</strong> Calculates accuracy by summing up all individual true positives and true negatives across all classes, making it suitable for multiclass or multilabel problems.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
157 <p><strong>Token Accuracy:</strong> Measures how often the predicted tokens (e.g., in sequences) match the true tokens. Useful in sequence prediction tasks like NLP.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
158 <h3>2) Precision, Recall & Specificity</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
159 <p><strong>Precision:</strong> Out of all positive predictions, how many were correct. Precision = TP / (TP + FP). Helps when false positives are costly.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
160 <p><strong>Recall (Sensitivity):</strong> Out of all actual positives, how many were predicted correctly. Recall = TP / (TP + FN). Important when missing positives is risky.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
161 <p><strong>Specificity:</strong> True negative rate. Measures how well the model identifies negatives. Specificity = TN / (TN + FP). Useful in medical testing to avoid false alarms.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
162 <h3>3) Macro, Micro, and Weighted Averages</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
163 <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric across all classes, treating each class equally, regardless of class frequency. Best when class sizes are balanced.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
164 <p><strong>Micro Precision / Recall / F1:</strong> Aggregates TP, FP, FN across all classes before computing the metric. Gives a global view and is ideal for class-imbalanced problems.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
165 <p><strong>Weighted Precision / Recall / F1:</strong> Averages each metric across classes, weighted by the number of true instances per class. Balances importance of classes based on frequency.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
166 <h3>4) Average Precision (PR-AUC Variants)</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
167 <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged across all classes equally. Useful for balanced multi-class problems.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
168 <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC using all instances. Best for imbalanced data or multi-label classification.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
169 <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged across individual samples (not classes). Ideal for multi-label problems where each sample can belong to multiple classes.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
170 <h3>5) ROC-AUC Variants</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
171 <p><strong>ROC-AUC:</strong> Measures model's ability to distinguish between classes. AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
172 <p><strong>Macro ROC-AUC:</strong> Averages the AUC across all classes equally. Suitable when classes are balanced and of equal importance.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
173 <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions across all classes. Useful in multiclass or multilabel settings with imbalance.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
174 <h3>6) Ranking Metrics</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
175 <p><strong>Hits at K:</strong> Measures whether the true label is among the top-K predictions. Common in recommendation systems and retrieval tasks.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
176 <h3>7) Confusion Matrix Stats (Per Class)</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
177 <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions for positives and negatives respectively.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
178 <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions — false alarms and missed detections.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
179 <h3>8) Other Useful Metrics</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
180 <p><strong>Cohen's Kappa:</strong> Measures agreement between predicted and actual values adjusted for chance. Useful for multiclass classification with imbalanced labels.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
181 <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure of prediction quality that takes into account TP, TN, FP, and FN. Particularly effective for imbalanced datasets.</p> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
182 <h3>9) Metric Recommendations</h3> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
183 <ul> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
184 <li>Use <strong>Accuracy + F1</strong> for balanced data.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
185 <li>Use <strong>Precision, Recall, ROC-AUC</strong> for imbalanced datasets.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
186 <li>Use <strong>Average Precision Micro</strong> for multilabel or class-imbalanced problems.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
187 <li>Use <strong>Macro scores</strong> when all classes should be treated equally.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
188 <li>Use <strong>Weighted scores</strong> when class imbalance should be accounted for without ignoring small classes.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
189 <li>Use <strong>Confusion Matrix stats</strong> to analyze class-wise performance.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
190 <li>Use <strong>Hits at K</strong> for recommendation or ranking-based tasks.</li> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
191 </ul> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
192 </div> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
193 </div> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
194 </div> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
195 """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
196 modal_css = """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
197 <style> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
198 .modal { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
199 display: none; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
200 position: fixed; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
201 z-index: 1; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
202 left: 0; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
203 top: 0; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
204 width: 100%; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
205 height: 100%; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
206 overflow: auto; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
207 background-color: rgba(0,0,0,0.4); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
208 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
209 .modal-content { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
210 background-color: #fefefe; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
211 margin: 15% auto; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
212 padding: 20px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
213 border: 1px solid #888; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
214 width: 80%; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
215 max-width: 800px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
216 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
217 .close { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
218 color: #aaa; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
219 float: right; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
220 font-size: 28px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
221 font-weight: bold; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
222 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
223 .close:hover, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
224 .close:focus { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
225 color: black; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
226 text-decoration: none; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
227 cursor: pointer; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
228 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
229 .metrics-guide h3 { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
230 margin-top: 20px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
231 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
232 .metrics-guide p { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
233 margin: 5px 0; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
234 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
235 .metrics-guide ul { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
236 margin: 10px 0; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
237 padding-left: 20px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
238 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
239 </style> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
240 """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
241 modal_js = """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
242 <script> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
243 document.addEventListener("DOMContentLoaded", function() { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
244 var modal = document.getElementById("metricsHelpModal"); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
245 var closeBtn = document.getElementsByClassName("close")[0]; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
246 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
247 document.querySelectorAll(".openMetricsHelp").forEach(btn => { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
248 btn.onclick = function() { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
249 modal.style.display = "block"; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
250 }; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
251 }); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
252 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
253 if (closeBtn) { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
254 closeBtn.onclick = function() { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
255 modal.style.display = "none"; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
256 }; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
257 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
258 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
259 window.onclick = function(event) { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
260 if (event.target == modal) { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
261 modal.style.display = "none"; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
262 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
263 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
264 }); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
265 </script> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
266 """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
267 return modal_css + modal_html + modal_js |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
268 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
269 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
270 def format_config_table_html( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
271 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
272 split_info: Optional[str] = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
273 training_progress: dict = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
274 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
275 display_keys = [ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
276 "model_name", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
277 "epochs", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
278 "batch_size", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
279 "fine_tune", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
280 "use_pretrained", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
281 "learning_rate", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
282 "random_seed", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
283 "early_stop", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
284 ] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
285 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
286 rows = [] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
287 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
288 for key in display_keys: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
289 val = config.get(key, "N/A") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
290 if key == "batch_size": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
291 if val is not None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
292 val = int(val) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
293 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
294 if training_progress: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
295 val = "Auto-selected batch size by Ludwig:<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
296 resolved_val = training_progress.get("batch_size") |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
297 val += f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
298 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
299 val = "auto" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
300 if key == "learning_rate": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
301 resolved_val = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
302 if val is None or val == "auto": |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
303 if training_progress: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
304 resolved_val = training_progress.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
305 val = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
306 "Auto-selected learning rate by Ludwig:<br>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
307 f"<span style='font-size: 0.85em;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
308 f"{resolved_val if resolved_val else val}</span><br>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
309 "<span style='font-size: 0.85em;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
310 "Based on model architecture and training setup " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
311 "(e.g., fine-tuning).<br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
312 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
313 "#trainer-parameters' target='_blank'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
314 "Ludwig Trainer Parameters</a> for details." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
315 "</span>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
316 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
317 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
318 val = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
319 "Auto-selected by Ludwig<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
320 "<span style='font-size: 0.85em;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
321 "Automatically tuned based on architecture and dataset.<br>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
322 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
323 "#trainer-parameters' target='_blank'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
324 "Ludwig Trainer Parameters</a> for details." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
325 "</span>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
326 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
327 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
328 val = f"{val:.6f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
329 if key == "epochs": |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
330 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
331 training_progress |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
332 and "epoch" in training_progress |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
333 and val > training_progress["epoch"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
334 ): |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
335 val = ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
336 f"Because of early stopping: the training " |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
337 f"stopped at epoch {training_progress['epoch']}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
338 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
339 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
340 if val is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
341 continue |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
342 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
343 f"<tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
344 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
345 f"{key.replace('_', ' ').title()}</td>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
346 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
347 f"{val}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
348 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
349 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
350 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
351 if split_info: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
352 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
353 f"<tr>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
354 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
355 f"Data Split</td>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
356 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
357 f"{split_info}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
358 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
359 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
360 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
361 return ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
362 "<h2 style='text-align: center;'>Training Setup</h2>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
363 "<div style='display: flex; justify-content: center;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
364 "<table style='border-collapse: collapse; width: 60%; table-layout: auto;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
365 "<thead><tr>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
366 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
367 "Parameter</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
368 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
369 "Value</th>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
370 "</tr></thead><tbody>" + "".join(rows) + "</tbody></table></div><br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
371 "<p style='text-align: center; font-size: 0.9em;'>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
372 "Model trained using Ludwig.<br>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
373 "If want to learn more about Ludwig default settings," |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
374 "please check the their <a href='https://ludwig.ai' target='_blank'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
375 "website(ludwig.ai)</a>." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
376 "</p><hr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
377 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
378 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
379 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
380 def detect_output_type(test_stats): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
381 """Detects if the output type is 'binary' or 'category' based on test statistics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
382 label_stats = test_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
383 per_class = label_stats.get("per_class_stats", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
384 if len(per_class) == 2: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
385 return "binary" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
386 return "category" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
387 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
388 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
389 def extract_metrics_from_json( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
390 train_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
391 test_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
392 output_type: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
393 ) -> dict: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
394 """Extracts relevant metrics from training and test statistics based on the output type.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
395 metrics = {"training": {}, "validation": {}, "test": {}} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
396 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
397 def get_last_value(stats, key): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
398 val = stats.get(key) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
399 if isinstance(val, list) and val: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
400 return val[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
401 elif isinstance(val, (int, float)): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
402 return val |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
403 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
404 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
405 for split in ["training", "validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
406 split_stats = train_stats.get(split, {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
407 if not split_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
408 logging.warning(f"No statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
409 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
410 label_stats = split_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
411 if not label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
412 logging.warning(f"No label statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
413 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
414 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
415 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
416 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
417 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
418 "precision": get_last_value(label_stats, "precision"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
419 "recall": get_last_value(label_stats, "recall"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
420 "specificity": get_last_value(label_stats, "specificity"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
421 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
422 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
423 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
424 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
425 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
426 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
427 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
428 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
429 "hits_at_k": get_last_value(label_stats, "hits_at_k"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
430 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
431 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
432 # Test metrics: dynamic extraction according to exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
433 test_label_stats = test_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
434 if not test_label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
435 logging.warning("No label statistics found for test split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
436 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
437 combined_stats = test_stats.get("combined", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
438 overall_stats = test_label_stats.get("overall_stats", {}) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
439 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
440 # Define exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
441 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
442 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
443 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
444 exclude = {"per_class_stats", "confusion_matrix"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
445 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
446 # 1. Get all scalar test_label_stats not excluded |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
447 test_metrics = {} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
448 for k, v in test_label_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
449 if k in exclude: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
450 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
451 if k == "overall_stats": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
452 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
453 if isinstance(v, (int, float, str, bool)): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
454 test_metrics[k] = v |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
455 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
456 # 2. Add overall_stats (flattened) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
457 for k, v in overall_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
458 test_metrics[k] = v |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
459 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
460 # 3. Optionally include combined/loss if present and not already |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
461 if "loss" in combined_stats and "loss" not in test_metrics: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
462 test_metrics["loss"] = combined_stats["loss"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
463 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
464 metrics["test"] = test_metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
465 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
466 return metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
467 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
468 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
469 def generate_table_row(cells, styles): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
470 """Helper function to generate an HTML table row.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
471 return ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
472 "<tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
473 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
474 + "</tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
475 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
476 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
477 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
478 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
479 """Formats a combined HTML table for training, validation, and test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
480 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
481 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
482 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
483 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
484 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
485 metric_key in all_metrics["validation"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
486 and metric_key in all_metrics["test"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
487 ): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
488 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
489 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
490 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
491 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
492 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
493 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
494 te = all_metrics["test"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
495 if all(x is not None for x in [t, v, te]): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
496 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
497 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
498 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
499 return "<table><tr><td>No metric values found.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
500 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
501 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
502 "<h2 style='text-align: center;'>Model Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
503 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
504 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
505 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
506 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
507 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
508 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
509 "white-space: nowrap;'>Train</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
510 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
511 "white-space: nowrap;'>Validation</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
512 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
513 "white-space: nowrap;'>Test</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
514 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
515 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
516 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
517 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
518 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
519 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
520 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
521 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
522 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
523 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
524 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
525 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
526 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
527 """Formats an HTML table for training and validation metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
528 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
529 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
530 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
531 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
532 if metric_key in all_metrics["validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
533 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
534 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
535 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
536 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
537 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
538 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
539 if t is not None and v is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
540 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
541 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
542 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
543 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
544 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
545 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
546 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
547 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
548 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
549 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
550 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
551 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
552 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
553 "white-space: nowrap;'>Train</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
554 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
555 "white-space: nowrap;'>Validation</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
556 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
557 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
558 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
559 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
560 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
561 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
562 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
563 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
564 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
565 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
566 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
567 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
568 def format_test_merged_stats_table_html(test_metrics: Dict[str, Optional[float]]) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
569 """Formats an HTML table for test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
570 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
571 for key in sorted(test_metrics.keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
572 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
573 value = test_metrics[key] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
574 if value is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
575 rows.append([display_name, f"{value:.4f}"]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
576 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
577 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
578 return "<table><tr><td>No test metric values found.</td></tr></table>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
579 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
580 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
581 "<h2 style='text-align: center;'>Test Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
582 "<div style='display: flex; justify-content: center;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
583 "<table style='border-collapse: collapse; table-layout: auto;'>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
584 "<thead><tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
585 "<th style='padding: 10px; border: 1px solid #ccc; text-align: left; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
586 "white-space: nowrap;'>Metric</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
587 "<th style='padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
588 "white-space: nowrap;'>Test</th>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
589 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
590 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
591 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
592 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
593 row, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
594 "padding: 10px; border: 1px solid #ccc; text-align: center; " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
595 "white-space: nowrap;", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
596 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
597 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
598 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
599 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
600 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
601 def build_tabbed_html(metrics_html: str, train_val_html: str, test_html: str) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
602 return f""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
603 <style> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
604 .tabs {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
605 display: flex; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
606 border-bottom: 2px solid #ccc; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
607 margin-bottom: 1rem; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
608 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
609 .tab {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
610 padding: 10px 20px; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
611 cursor: pointer; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
612 border: 1px solid #ccc; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
613 border-bottom: none; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
614 background: #f9f9f9; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
615 margin-right: 5px; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
616 border-top-left-radius: 8px; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
617 border-top-right-radius: 8px; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
618 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
619 .tab.active {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
620 background: white; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
621 font-weight: bold; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
622 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
623 .tab-content {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
624 display: none; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
625 padding: 20px; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
626 border: 1px solid #ccc; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
627 border-top: none; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
628 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
629 .tab-content.active {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
630 display: block; |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
631 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
632 </style> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
633 <div class="tabs"> |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
634 <div class="tab active" onclick="showTab('metrics')"> Config & Results Summary</div> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
635 <div class="tab" onclick="showTab('trainval')"> Train/Validation Results</div> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
636 <div class="tab" onclick="showTab('test')"> Test Results</div> |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
637 </div> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
638 <div id="metrics" class="tab-content active"> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
639 {metrics_html} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
640 </div> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
641 <div id="trainval" class="tab-content"> |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
642 {train_val_html} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
643 </div> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
644 <div id="test" class="tab-content"> |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
645 {test_html} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
646 </div> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
647 <script> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
648 function showTab(id) {{ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
649 document.querySelectorAll('.tab-content').forEach(el => el.classList.remove('active')); |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
650 document.querySelectorAll('.tab').forEach(el => el.classList.remove('active')); |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
651 document.getElementById(id).classList.add('active'); |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
652 document.querySelector(`.tab[onclick*="${{id}}"]`).classList.add('active'); |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
653 }} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
654 </script> |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
655 """ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
656 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
657 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
658 def split_data_0_2( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
659 df: pd.DataFrame, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
660 split_column: str, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
661 validation_size: float = 0.15, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
662 random_state: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
663 label_column: Optional[str] = None, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
664 ) -> pd.DataFrame: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
665 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
666 out = df.copy() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
667 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
668 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
669 idx_train = out.index[out[split_column] == 0].tolist() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
670 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
671 if not idx_train: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
672 logger.info("No rows with split=0; nothing to do.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
673 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
674 stratify_arr = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
675 if label_column and label_column in out.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
676 label_counts = out.loc[idx_train, label_column].value_counts() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
677 if label_counts.size > 1 and (label_counts.min() * validation_size) >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
678 stratify_arr = out.loc[idx_train, label_column] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
679 else: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
680 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
681 "Cannot stratify (too few labels); splitting without stratify." |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
682 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
683 if validation_size <= 0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
684 logger.info("validation_size <= 0; keeping all as train.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
685 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
686 if validation_size >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
687 logger.info("validation_size >= 1; moving all train → validation.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
688 out.loc[idx_train, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
689 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
690 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
691 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
692 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
693 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
694 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
695 stratify=stratify_arr, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
696 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
697 except ValueError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
698 logger.warning(f"Stratified split failed ({e}); retrying without stratify.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
699 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
700 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
701 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
702 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
703 stratify=None, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
704 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
705 out.loc[train_idx, split_column] = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
706 out.loc[val_idx, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
707 out[split_column] = out[split_column].astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
708 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
709 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
710 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
711 class Backend(Protocol): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
712 """Interface for a machine learning backend.""" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
713 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
714 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
715 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
716 config_params: Dict[str, Any], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
717 split_config: Dict[str, Any], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
718 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
719 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
720 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
721 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
722 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
723 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
724 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
725 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
726 random_seed: int, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
727 ) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
728 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
729 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
730 def generate_plots( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
731 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
732 output_dir: Path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
733 ) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
734 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
735 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
736 def generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
737 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
738 title: str, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
739 output_dir: str |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
740 ) -> Path: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
741 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
742 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
743 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
744 class LudwigDirectBackend: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
745 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
746 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
747 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
748 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
749 config_params: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
750 split_config: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
751 ) -> str: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
752 """Build and serialize the Ludwig YAML configuration.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
753 logger.info("LudwigDirectBackend: Preparing YAML configuration.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
754 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
755 model_name = config_params.get("model_name", "resnet18") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
756 use_pretrained = config_params.get("use_pretrained", False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
757 fine_tune = config_params.get("fine_tune", False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
758 epochs = config_params.get("epochs", 10) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
759 batch_size = config_params.get("batch_size") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
760 num_processes = config_params.get("preprocessing_num_processes", 1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
761 early_stop = config_params.get("early_stop", None) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
762 learning_rate = config_params.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
763 learning_rate = "auto" if learning_rate is None else float(learning_rate) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
764 trainable = fine_tune or (not use_pretrained) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
765 if not use_pretrained and not trainable: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
766 logger.warning("trainable=False; use_pretrained=False is ignored.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
767 logger.warning("Setting trainable=True to train the model from scratch.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
768 trainable = True |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
769 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
770 if isinstance(raw_encoder, dict): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
771 encoder_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
772 **raw_encoder, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
773 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
774 "trainable": trainable, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
775 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
776 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
777 encoder_config = {"type": raw_encoder} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
778 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
779 batch_size_cfg = batch_size or "auto" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
780 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
781 label_column_path = config_params.get("label_column_data_path") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
782 if label_column_path is not None and Path(label_column_path).exists(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
783 try: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
784 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
785 num_unique_labels = label_series.nunique() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
786 except Exception as e: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
787 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
788 f"Could not determine label cardinality, defaulting to 'binary': {e}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
789 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
790 num_unique_labels = 2 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
791 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
792 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
793 "label_column_data_path not provided, defaulting to 'binary'" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
794 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
795 num_unique_labels = 2 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
796 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
797 output_type = "binary" if num_unique_labels == 2 else "category" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
798 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
799 conf: Dict[str, Any] = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
800 "model_type": "ecd", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
801 "input_features": [ |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
802 { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
803 "name": IMAGE_PATH_COLUMN_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
804 "type": "image", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
805 "encoder": encoder_config, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
806 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
807 ], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
808 "output_features": [{"name": LABEL_COLUMN_NAME, "type": output_type}], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
809 "combiner": {"type": "concat"}, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
810 "trainer": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
811 "epochs": epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
812 "early_stop": early_stop, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
813 "batch_size": batch_size_cfg, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
814 "learning_rate": learning_rate, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
815 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
816 "preprocessing": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
817 "split": split_config, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
818 "num_processes": num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
819 "in_memory": False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
820 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
821 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
822 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
823 logger.debug("LudwigDirectBackend: Config dict built.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
824 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
825 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
826 logger.info("LudwigDirectBackend: YAML config generated.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
827 return yaml_str |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
828 except Exception: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
829 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
830 "LudwigDirectBackend: Failed to serialize YAML.", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
831 exc_info=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
832 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
833 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
834 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
835 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
836 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
837 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
838 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
839 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
840 random_seed: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
841 ) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
842 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
843 logger.info("LudwigDirectBackend: Starting experiment execution.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
844 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
845 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
846 from ludwig.experiment import experiment_cli |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
847 except ImportError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
848 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
849 "LudwigDirectBackend: Could not import experiment_cli.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
850 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
851 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
852 raise RuntimeError("Ludwig import failed.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
853 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
854 output_dir.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
855 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
856 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
857 experiment_cli( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
858 dataset=str(dataset_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
859 config=str(config_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
860 output_directory=str(output_dir), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
861 random_seed=random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
862 ) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
863 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
864 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
865 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
866 except TypeError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
867 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
868 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
869 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
870 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
871 raise RuntimeError("Ludwig argument error.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
872 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
873 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
874 "LudwigDirectBackend: Experiment execution error.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
875 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
876 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
877 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
878 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
879 def get_training_process(self, output_dir) -> float: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
880 """Retrieve the learning rate used in the most recent Ludwig run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
881 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
882 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
883 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
884 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
885 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
886 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
887 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
888 logger.warning(f"No experiment run directories found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
889 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
890 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
891 progress_file = exp_dirs[-1] / "model" / "training_progress.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
892 if not progress_file.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
893 logger.warning(f"No training_progress.json found in {progress_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
894 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
895 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
896 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
897 with progress_file.open("r", encoding="utf-8") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
898 data = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
899 return { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
900 "learning_rate": data.get("learning_rate"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
901 "batch_size": data.get("batch_size"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
902 "epoch": data.get("epoch"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
903 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
904 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
905 logger.warning(f"Failed to read training progress info: {e}") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
906 return {} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
907 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
908 def convert_parquet_to_csv(self, output_dir: Path): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
909 """Convert the predictions Parquet file to CSV.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
910 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
911 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
912 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
913 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
914 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
915 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
916 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
917 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
918 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
919 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
920 csv_path = exp_dir / "predictions.csv" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
921 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
922 df = pd.read_parquet(parquet_path) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
923 df.to_csv(csv_path, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
924 logger.info(f"Converted Parquet to CSV: {csv_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
925 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
926 logger.error(f"Error converting Parquet to CSV: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
927 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
928 def generate_plots(self, output_dir: Path) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
929 """Generate all registered Ludwig visualizations for the latest experiment run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
930 logger.info("Generating all Ludwig visualizations…") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
931 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
932 test_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
933 "compare_performance", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
934 "compare_classifiers_performance_from_prob", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
935 "compare_classifiers_performance_from_pred", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
936 "compare_classifiers_performance_changing_k", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
937 "compare_classifiers_multiclass_multimetric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
938 "compare_classifiers_predictions", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
939 "confidence_thresholding_2thresholds_2d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
940 "confidence_thresholding_2thresholds_3d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
941 "confidence_thresholding", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
942 "confidence_thresholding_data_vs_acc", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
943 "binary_threshold_vs_metric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
944 "roc_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
945 "roc_curves_from_test_statistics", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
946 "calibration_1_vs_all", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
947 "calibration_multiclass", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
948 "confusion_matrix", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
949 "frequency_vs_f1", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
950 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
951 train_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
952 "learning_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
953 "compare_classifiers_performance_subset", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
954 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
955 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
956 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
957 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
958 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
959 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
960 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
961 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
962 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
963 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
964 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
965 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
966 viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
967 viz_dir.mkdir(exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
968 train_viz = viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
969 test_viz = viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
970 train_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
971 test_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
972 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
973 def _check(p: Path) -> Optional[str]: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
974 return str(p) if p.exists() else None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
975 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
976 training_stats = _check(exp_dir / "training_statistics.json") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
977 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
978 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
979 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
980 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
981 dataset_path = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
982 split_file = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
983 desc = exp_dir / DESCRIPTION_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
984 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
985 with open(desc, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
986 cfg = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
987 dataset_path = _check(Path(cfg.get("dataset", ""))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
988 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
989 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
990 output_feature = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
991 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
992 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
993 output_feature = cfg["config"]["output_features"][0]["name"] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
994 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
995 pass |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
996 if not output_feature and test_stats: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
997 with open(test_stats, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
998 stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
999 output_feature = next(iter(stats.keys()), "") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1000 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1001 viz_registry = get_visualizations_registry() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1002 for viz_name, viz_func in viz_registry.items(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1003 viz_dir_plot = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1004 if viz_name in train_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1005 viz_dir_plot = train_viz |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1006 elif viz_name in test_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1007 viz_dir_plot = test_viz |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1008 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1009 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1010 viz_func( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1011 training_statistics=[training_stats] if training_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1012 test_statistics=[test_stats] if test_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1013 probabilities=[probs_path] if probs_path else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1014 output_feature_name=output_feature, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1015 ground_truth_split=2, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1016 top_n_classes=[0], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1017 top_k=3, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1018 ground_truth_metadata=gt_metadata, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1019 ground_truth=dataset_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1020 split_file=split_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1021 output_directory=str(viz_dir_plot), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1022 normalize=False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1023 file_format="png", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1024 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1025 logger.info(f"✔ Generated {viz_name}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1026 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1027 logger.warning(f"✘ Skipped {viz_name}: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1028 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1029 logger.info(f"All visualizations written to {viz_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1030 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1031 def generate_html_report( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1032 self, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1033 title: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1034 output_dir: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1035 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1036 split_info: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1037 ) -> Path: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1038 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1039 cwd = Path.cwd() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1040 report_name = title.lower().replace(" ", "_") + "_report.html" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1041 report_path = cwd / report_name |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1042 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1043 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1044 exp_dirs = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1045 output_dir.glob("experiment_run*"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1046 key=lambda p: p.stat().st_mtime, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1047 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1048 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1049 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1050 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1051 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1052 base_viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1053 train_viz_dir = base_viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1054 test_viz_dir = base_viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1055 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1056 html = get_html_template() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1057 html += f"<h1>{title}</h1>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1058 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1059 metrics_html = "" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1060 train_val_metrics_html = "" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1061 test_metrics_html = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1062 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1063 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1064 train_stats_path = exp_dir / "training_statistics.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1065 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1066 if train_stats_path.exists() and test_stats_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1067 with open(train_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1068 train_stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1069 with open(test_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1070 test_stats = json.load(f) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1071 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1072 all_metrics = extract_metrics_from_json( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1073 train_stats, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1074 test_stats, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1075 output_type, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1076 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1077 metrics_html = format_stats_table_html(train_stats, test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1078 train_val_metrics_html = format_train_val_stats_table_html( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1079 train_stats, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1080 test_stats, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1081 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1082 test_metrics_html = format_test_merged_stats_table_html( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1083 all_metrics["test"], |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1084 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1085 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1086 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1087 f"Could not load stats for HTML report: {type(e).__name__}: {e}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1088 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1089 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1090 config_html = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1091 training_progress = self.get_training_process(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1092 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1093 config_html = format_config_table_html(config, split_info, training_progress) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1094 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1095 logger.warning(f"Could not load config for HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1096 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1097 def render_img_section(title: str, dir_path: Path, output_type: str = None) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1098 if not dir_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1099 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1100 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1101 imgs = list(dir_path.glob("*.png")) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1102 if not imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1103 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1104 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1105 if title == "Test Visualizations" and output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1106 order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1107 "confusion_matrix__label_top2.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1108 "roc_curves_from_prediction_statistics.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1109 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1110 "confusion_matrix_entropy__label_top2.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1111 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1112 img_names = {img.name: img for img in imgs} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1113 ordered_imgs = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1114 img_names[fname] for fname in order if fname in img_names |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1115 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1116 remaining = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1117 [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1118 img |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1119 for img in imgs |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1120 if img.name not in order and img.name != "roc_curves.png" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1121 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1122 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1123 imgs = ordered_imgs + remaining |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1124 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1125 elif title == "Test Visualizations" and output_type == "category": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1126 unwanted = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1127 "compare_classifiers_multiclass_multimetric__label_best10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1128 "compare_classifiers_multiclass_multimetric__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1129 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1130 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1131 display_order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1132 "confusion_matrix__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1133 "roc_curves.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1134 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1135 "compare_classifiers_performance_from_prob.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1136 "compare_classifiers_multiclass_multimetric__label_sorted.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1137 "confusion_matrix_entropy__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1138 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1139 img_names = {img.name: img for img in imgs if img.name not in unwanted} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1140 ordered_imgs = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1141 img_names[fname] for fname in display_order if fname in img_names |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1142 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1143 remaining = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1144 [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1145 img |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1146 for img in img_names.values() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1147 if img.name not in display_order |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1148 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1149 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1150 imgs = ordered_imgs + remaining |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1151 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1152 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1153 if output_type == "category": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1154 unwanted = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1155 "compare_classifiers_multiclass_multimetric__label_best10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1156 "compare_classifiers_multiclass_multimetric__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1157 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1158 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1159 imgs = sorted([img for img in imgs if img.name not in unwanted]) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1160 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1161 imgs = sorted(imgs) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1162 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1163 section_html = f"<h2 style='text-align: center;'>{title}</h2><div>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1164 for img in imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1165 b64 = encode_image_to_base64(str(img)) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1166 section_html += ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1167 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1168 f"<h3>{img.stem.replace('_', ' ').title()}</h3>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1169 f'<img src="data:image/png;base64,{b64}" ' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1170 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1171 f"</div>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1172 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1173 section_html += "</div>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1174 return section_html |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1175 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1176 button_html = """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1177 <button class="help-modal-btn openMetricsHelp">Model Evaluation Metrics — Help Guide</button> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1178 <br><br> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1179 <style> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1180 .help-modal-btn { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1181 background-color: #17623b; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1182 color: #fff; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1183 border: none; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1184 border-radius: 24px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1185 padding: 10px 28px; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1186 font-size: 1.1rem; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1187 font-weight: bold; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1188 letter-spacing: 0.03em; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1189 cursor: pointer; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1190 transition: background 0.2s, box-shadow 0.2s; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1191 box-shadow: 0 2px 8px rgba(23,98,59,0.07); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1192 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1193 .help-modal-btn:hover, .help-modal-btn:focus { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1194 background-color: #21895e; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1195 outline: none; |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1196 box-shadow: 0 4px 16px rgba(23,98,59,0.14); |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1197 } |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1198 </style> |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1199 """ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1200 tab1_content = button_html + config_html + metrics_html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1201 tab2_content = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1202 button_html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1203 + train_val_metrics_html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1204 + render_img_section("Training & Validation Visualizations", train_viz_dir) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1205 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1206 tab3_content = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1207 button_html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1208 + test_metrics_html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1209 + render_img_section("Test Visualizations", test_viz_dir, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1210 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1211 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1212 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1213 modal_html = get_metrics_help_modal() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1214 html += tabbed_html + modal_html |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1215 html += get_html_closing() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1216 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1217 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1218 with open(report_path, "w") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1219 f.write(html) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1220 logger.info(f"HTML report generated at: {report_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1221 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1222 logger.error(f"Failed to write HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1223 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1224 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1225 return report_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1226 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1227 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1228 class WorkflowOrchestrator: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1229 """Manages the image-classification workflow.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1230 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1231 def __init__(self, args: argparse.Namespace, backend: Backend): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1232 self.args = args |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1233 self.backend = backend |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1234 self.temp_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1235 self.image_extract_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1236 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1237 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1238 def _create_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1239 """Create temporary output and image extraction directories.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1240 try: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1241 self.temp_dir = Path( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1242 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1243 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1244 self.image_extract_dir = self.temp_dir / "images" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1245 self.image_extract_dir.mkdir() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1246 logger.info(f"Created temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1247 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1248 logger.error("Failed to create temporary directories", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1249 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1250 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1251 def _extract_images(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1252 """Extract images from ZIP into the temp image directory.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1253 if self.image_extract_dir is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1254 raise RuntimeError("Temp image directory not initialized.") |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1255 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1256 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1257 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1258 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1259 with zipfile.ZipFile(self.args.image_zip, "r") as z: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1260 z.extractall(self.image_extract_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1261 logger.info("Image extraction complete.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1262 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1263 logger.error("Error extracting zip file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1264 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1265 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1266 def _prepare_data(self) -> Tuple[Path, Dict[str, Any]]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1267 """Load CSV, update image paths, handle splits, and write prepared CSV.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1268 if not self.temp_dir or not self.image_extract_dir: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1269 raise RuntimeError("Temp dirs not initialized before data prep.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1270 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1271 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1272 df = pd.read_csv(self.args.csv_file) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1273 logger.info(f"Loaded CSV: {self.args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1274 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1275 logger.error("Error loading CSV file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1276 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1277 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1278 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1279 missing = required - set(df.columns) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1280 if missing: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1281 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1282 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1283 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1284 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1285 lambda p: str((self.image_extract_dir / p).resolve()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1286 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1287 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1288 logger.error("Error updating image paths", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1289 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1290 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1291 if SPLIT_COLUMN_NAME in df.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1292 df, split_config, split_info = self._process_fixed_split(df) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1293 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1294 logger.info("No split column; using random split") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1295 split_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1296 "type": "random", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1297 "probabilities": self.args.split_probabilities, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1298 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1299 split_info = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1300 f"No split column in CSV. Used random split: " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1301 f"{[int(p * 100) for p in self.args.split_probabilities]}% " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1302 f"for train/val/test." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1303 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1304 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1305 final_csv = TEMP_CSV_FILENAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1306 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1307 df.to_csv(final_csv, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1308 logger.info(f"Saved prepared data to {final_csv}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1309 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1310 logger.error("Error saving prepared CSV", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1311 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1312 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1313 return final_csv, split_config, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1314 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1315 def _process_fixed_split(self, df: pd.DataFrame) -> Dict[str, Any]: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1316 """Process a fixed split column (0=train,1=val,2=test).""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1317 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1318 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1319 col = df[SPLIT_COLUMN_NAME] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1320 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1321 pd.Int64Dtype() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1322 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1323 if df[SPLIT_COLUMN_NAME].isna().any(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1324 logger.warning("Split column contains non-numeric/missing values.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1325 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1326 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1327 logger.info(f"Unique split values: {unique}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1328 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1329 if unique == {0, 2}: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1330 df = split_data_0_2( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1331 df, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1332 SPLIT_COLUMN_NAME, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1333 validation_size=self.args.validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1334 label_column=LABEL_COLUMN_NAME, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1335 random_state=self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1336 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1337 split_info = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1338 "Detected a split column (with values 0 and 2) in the input CSV. " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1339 f"Used this column as a base and reassigned " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1340 f"{self.args.validation_size * 100:.1f}% " |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1341 "of the training set (originally labeled 0) to validation (labeled 1)." |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1342 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1343 logger.info("Applied custom 0/2 split.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1344 elif unique.issubset({0, 1, 2}): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1345 split_info = "Used user-defined split column from CSV." |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1346 logger.info("Using fixed split as-is.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1347 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1348 raise ValueError(f"Unexpected split values: {unique}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1349 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1350 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1351 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1352 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1353 logger.error("Error processing fixed split", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1354 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1355 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1356 def _cleanup_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1357 if self.temp_dir and self.temp_dir.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1358 logger.info(f"Cleaning up temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1359 shutil.rmtree(self.temp_dir, ignore_errors=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1360 self.temp_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1361 self.image_extract_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1362 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1363 def run(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1364 """Execute the full workflow end-to-end.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1365 logger.info("Starting workflow...") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1366 self.args.output_dir.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1367 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1368 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1369 self._create_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1370 self._extract_images() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1371 csv_path, split_cfg, split_info = self._prepare_data() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1372 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1373 use_pretrained = self.args.use_pretrained or self.args.fine_tune |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1374 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1375 backend_args = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1376 "model_name": self.args.model_name, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1377 "fine_tune": self.args.fine_tune, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1378 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1379 "epochs": self.args.epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1380 "batch_size": self.args.batch_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1381 "preprocessing_num_processes": self.args.preprocessing_num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1382 "split_probabilities": self.args.split_probabilities, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1383 "learning_rate": self.args.learning_rate, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1384 "random_seed": self.args.random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1385 "early_stop": self.args.early_stop, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1386 "label_column_data_path": csv_path, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1387 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1388 yaml_str = self.backend.prepare_config(backend_args, split_cfg) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1389 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1390 config_file = self.temp_dir / TEMP_CONFIG_FILENAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1391 config_file.write_text(yaml_str) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1392 logger.info(f"Wrote backend config: {config_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1393 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1394 self.backend.run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1395 csv_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1396 config_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1397 self.args.output_dir, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1398 self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1399 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1400 logger.info("Workflow completed successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1401 self.backend.generate_plots(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1402 report_file = self.backend.generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1403 "Image Classification Results", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1404 self.args.output_dir, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1405 backend_args, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1406 split_info, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1407 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1408 logger.info(f"HTML report generated at: {report_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1409 self.backend.convert_parquet_to_csv(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1410 logger.info("Converted Parquet to CSV.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1411 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1412 logger.error("Workflow execution failed", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1413 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1414 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1415 self._cleanup_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1416 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1417 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1418 def parse_learning_rate(s): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1419 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1420 return float(s) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1421 except (TypeError, ValueError): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1422 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1423 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1424 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1425 class SplitProbAction(argparse.Action): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1426 def __call__(self, parser, namespace, values, option_string=None): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1427 train, val, test = values |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1428 total = train + val + test |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1429 if abs(total - 1.0) > 1e-6: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1430 parser.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1431 f"--split-probabilities must sum to 1.0; " |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1432 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1433 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1434 setattr(namespace, self.dest, values) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1435 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1436 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1437 def main(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1438 parser = argparse.ArgumentParser( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1439 description="Image Classification Learner with Pluggable Backends", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1440 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1441 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1442 "--csv-file", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1443 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1444 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1445 help="Path to the input CSV", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1446 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1447 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1448 "--image-zip", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1449 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1450 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1451 help="Path to the images ZIP", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1452 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1453 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1454 "--model-name", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1455 required=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1456 choices=MODEL_ENCODER_TEMPLATES.keys(), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1457 help="Which model template to use", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1458 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1459 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1460 "--use-pretrained", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1461 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1462 help="Use pretrained weights for the model", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1463 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1464 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1465 "--fine-tune", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1466 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1467 help="Enable fine-tuning", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1468 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1469 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1470 "--epochs", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1471 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1472 default=10, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1473 help="Number of training epochs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1474 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1475 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1476 "--early-stop", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1477 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1478 default=5, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1479 help="Early stopping patience", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1480 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1481 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1482 "--batch-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1483 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1484 help="Batch size (None = auto)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1485 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1486 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1487 "--output-dir", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1488 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1489 default=Path("learner_output"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1490 help="Where to write outputs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1491 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1492 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1493 "--validation-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1494 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1495 default=0.15, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1496 help="Fraction for validation (0.0–1.0)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1497 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1498 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1499 "--preprocessing-num-processes", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1500 type=int, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1501 default=max(1, os.cpu_count() // 2), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1502 help="CPU processes for data prep", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1503 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1504 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1505 "--split-probabilities", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1506 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1507 nargs=3, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1508 metavar=("train", "val", "test"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1509 action=SplitProbAction, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1510 default=[0.7, 0.1, 0.2], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1511 help="Random split proportions (e.g., 0.7 0.1 0.2). Only used if no split column.", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1512 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1513 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1514 "--random-seed", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1515 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1516 default=42, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1517 help="Random seed used for dataset splitting (default: 42)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1518 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1519 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1520 "--learning-rate", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1521 type=parse_learning_rate, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1522 default=None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1523 help="Learning rate. If not provided, Ludwig will auto-select it.", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1524 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1525 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1526 args = parser.parse_args() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1527 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1528 if not 0.0 <= args.validation_size <= 1.0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1529 parser.error("validation-size must be between 0.0 and 1.0") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1530 if not args.csv_file.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1531 parser.error(f"CSV not found: {args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1532 if not args.image_zip.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1533 parser.error(f"ZIP not found: {args.image_zip}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1534 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1535 backend_instance = LudwigDirectBackend() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1536 orchestrator = WorkflowOrchestrator(args, backend_instance) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1537 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1538 exit_code = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1539 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1540 orchestrator.run() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1541 logger.info("Main script finished successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1542 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1543 logger.error(f"Main script failed.{e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1544 exit_code = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1545 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1546 sys.exit(exit_code) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1547 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1548 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1549 if __name__ == "__main__": |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1550 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1551 import ludwig |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1552 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1553 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1554 except ImportError: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1555 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1556 "Ludwig library not found. Please ensure Ludwig is installed " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1557 "('pip install ludwig[image]')" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1558 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1559 sys.exit(1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1560 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1561 main() |