annotate ludwig_experiment.py @ 7:b7ed0e483e4d draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
author goeckslab
date Sat, 22 Nov 2025 01:16:12 +0000
parents 36c5fcc49286
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
1 import base64
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
2 import html
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
3 import json
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
4 import logging
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
5 import os
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
6 import pickle
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
7 import re
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
8 import sys
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
9 from io import BytesIO
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
10
4
36c5fcc49286 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
goeckslab
parents: 1
diff changeset
11 import pandas as pd
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
12 from ludwig.api import LudwigModel
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
13 from ludwig.experiment import cli
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
14 from ludwig.globals import (
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
15 DESCRIPTION_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
16 PREDICTIONS_PARQUET_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
17 TEST_STATISTICS_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
18 TRAIN_SET_METADATA_FILE_NAME
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
19 )
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
20 from ludwig.utils.data_utils import get_split_path
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
21 from ludwig.visualize import get_visualizations_registry
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
22 from model_unpickler import SafeUnpickler
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
23 from utils import (
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
24 encode_image_to_base64,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
25 get_html_closing,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
26 get_html_template
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
27 )
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
28
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
29 try: # pragma: no cover - optional dependency in runtime containers
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
30 import matplotlib.pyplot as plt
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
31 except ImportError: # pragma: no cover
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
32 plt = None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
33
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
34
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
35 logging.basicConfig(level=logging.DEBUG)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
36
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
37 LOG = logging.getLogger(__name__)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
38
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
39 setattr(pickle, 'Unpickler', SafeUnpickler)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
40
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
41 # visualization
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
42 output_directory = None
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
43 for ix, arg in enumerate(sys.argv):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
44 if arg == "--output_directory":
4
36c5fcc49286 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
goeckslab
parents: 1
diff changeset
45 output_directory = sys.argv[ix + 1]
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
46 break
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
47
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
48 viz_output_directory = os.path.join(output_directory, "visualizations")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
49
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
50
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
51 def get_output_feature_name(experiment_dir, output_feature=0):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
52 """Helper function to extract specified output feature name.
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
53
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
54 :param experiment_dir: Path to the experiment directory
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
55 :param output_feature: position of the output feature the description.json
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
56 :return output_feature_name: name of the first output feature name
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
57 from the experiment
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
58 """
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
59 if os.path.exists(os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
60 description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
61 with open(description_file, "rb") as f:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
62 content = json.load(f)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
63 output_feature_name = \
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
64 content["config"]["output_features"][output_feature]["name"]
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
65 dataset_path = content["dataset"]
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
66 return output_feature_name, dataset_path
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
67 return None, None
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
68
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
69
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
70 def check_file(file_path):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
71 """Check if the file exists; return None if it doesn't."""
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
72 return file_path if os.path.exists(file_path) else None
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
73
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
74
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
75 def make_visualizations(ludwig_output_directory_name):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
76 ludwig_output_directory = os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
77 output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
78 ludwig_output_directory_name,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
79 )
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
80 visualizations = [
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
81 "confidence_thresholding",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
82 "confidence_thresholding_data_vs_acc",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
83 "confidence_thresholding_data_vs_acc_subset",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
84 "confidence_thresholding_data_vs_acc_subset_per_class",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
85 "confidence_thresholding_2thresholds_2d",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
86 "confidence_thresholding_2thresholds_3d",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
87 "binary_threshold_vs_metric",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
88 "roc_curves",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
89 "roc_curves_from_test_statistics",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
90 "calibration_1_vs_all",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
91 "calibration_multiclass",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
92 "confusion_matrix",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
93 "frequency_vs_f1",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
94 "learning_curves",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
95 ]
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
96
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
97 # Check existence of required files
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
98 training_statistics = check_file(os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
99 ludwig_output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
100 "training_statistics.json",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
101 ))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
102 test_statistics = check_file(os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
103 ludwig_output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
104 TEST_STATISTICS_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
105 ))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
106 ground_truth_metadata = check_file(os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
107 ludwig_output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
108 "model",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
109 TRAIN_SET_METADATA_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
110 ))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
111 probabilities = check_file(os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
112 ludwig_output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
113 PREDICTIONS_PARQUET_FILE_NAME,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
114 ))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
115
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
116 output_feature, dataset_path = get_output_feature_name(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
117 ludwig_output_directory)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
118 ground_truth = None
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
119 split_file = None
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
120 if dataset_path:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
121 ground_truth = check_file(dataset_path)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
122 split_file = check_file(get_split_path(dataset_path))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
123
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
124 if (not output_feature) and (test_statistics):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
125 test_stat = os.path.join(test_statistics)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
126 with open(test_stat, "rb") as f:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
127 content = json.load(f)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
128 output_feature = next(iter(content.keys()))
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
129
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
130 for viz in visualizations:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
131 viz_func = get_visualizations_registry()[viz]
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
132 try:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
133 viz_func(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
134 training_statistics=[training_statistics]
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
135 if training_statistics else [],
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
136 test_statistics=[test_statistics] if test_statistics else [],
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
137 probabilities=[probabilities] if probabilities else [],
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
138 top_n_classes=[0],
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
139 output_feature_name=output_feature if output_feature else "",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
140 ground_truth_split=2,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
141 top_k=3,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
142 ground_truth_metadata=ground_truth_metadata,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
143 ground_truth=ground_truth,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
144 split_file=split_file,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
145 output_directory=viz_output_directory,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
146 normalize=False,
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
147 file_format="png",
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
148 )
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
149 except Exception as e:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
150 LOG.info(f"Visualization: {viz}")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
151 LOG.info(f"Error: {e}")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
152
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
153
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
154 def convert_parquet_to_csv(ludwig_output_directory_name):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
155 """Convert the predictions Parquet file to CSV."""
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
156 ludwig_output_directory = os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
157 output_directory, ludwig_output_directory_name)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
158 parquet_path = os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
159 ludwig_output_directory, "predictions.parquet")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
160 csv_path = os.path.join(
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
161 ludwig_output_directory, "predictions_parquet.csv")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
162
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
163 try:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
164 df = pd.read_parquet(parquet_path)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
165 df.to_csv(csv_path, index=False)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
166 LOG.info(f"Converted Parquet to CSV: {csv_path}")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
167 except Exception as e:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
168 LOG.error(f"Error converting Parquet to CSV: {e}")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
169
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
170
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
171 def _resolve_dataset_path(dataset_path):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
172 if not dataset_path:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
173 return None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
174
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
175 candidates = [dataset_path]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
176
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
177 if not os.path.isabs(dataset_path):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
178 candidates.extend([
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
179 os.path.join(output_directory, dataset_path),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
180 os.path.join(os.getcwd(), dataset_path),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
181 ])
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
182
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
183 for candidate in candidates:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
184 if candidate and os.path.exists(candidate):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
185 return os.path.abspath(candidate)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
186
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
187 return None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
188
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
189
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
190 def _load_dataset_dataframe(dataset_path):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
191 if not dataset_path:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
192 return None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
193
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
194 _, ext = os.path.splitext(dataset_path.lower())
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
195
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
196 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
197 if ext in {".csv", ".tsv"}:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
198 sep = "\t" if ext == ".tsv" else ","
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
199 return pd.read_csv(dataset_path, sep=sep)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
200 if ext == ".parquet":
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
201 return pd.read_parquet(dataset_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
202 if ext == ".json":
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
203 return pd.read_json(dataset_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
204 if ext == ".h5":
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
205 return pd.read_hdf(dataset_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
206 except Exception as exc:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
207 LOG.warning(f"Unable to load dataset '{dataset_path}': {exc}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
208
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
209 LOG.warning("Unsupported dataset format for feature importance computation")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
210 return None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
211
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
212
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
213 def sanitize_feature_name(name):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
214 """Mirror Ludwig's get_sanitized_feature_name implementation."""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
215 return re.sub(r"[(){}.:\"\"\'\'\[\]]", "_", str(name))
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
216
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
217
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
218 def _sanitize_dataframe_columns(dataframe):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
219 """Rename dataframe columns to Ludwig-sanitized names for explainability."""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
220 column_map = {col: sanitize_feature_name(col) for col in dataframe.columns}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
221
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
222 sanitized_df = dataframe.rename(columns=column_map)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
223 if len(set(column_map.values())) != len(column_map.values()):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
224 LOG.warning(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
225 "Column name collision after sanitization; feature importance may be unreliable"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
226 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
227
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
228 return sanitized_df
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
229
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
230
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
231 def _feature_importance_plot(label_df, label_name, top_n=10, max_abs_importance=None):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
232 """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
233 Return base64-encoded bar plot for a label's top-N feature importances.
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
234
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
235 max_abs_importance lets us pin the x-axis across labels so readers can
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
236 compare magnitudes.
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
237 """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
238 if plt is None or label_df.empty:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
239 return ""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
240
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
241 top_features = label_df.nlargest(top_n, "abs_importance")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
242 if top_features.empty:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
243 return ""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
244
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
245 fig, ax = plt.subplots(figsize=(6, 3 + 0.2 * len(top_features)))
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
246 ax.barh(top_features["feature"], top_features["abs_importance"], color="#3f8fd2")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
247 ax.set_xlabel("|importance|")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
248 if max_abs_importance and max_abs_importance > 0:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
249 ax.set_xlim(0, max_abs_importance * 1.05)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
250 ax.invert_yaxis()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
251 fig.tight_layout()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
252
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
253 buf = BytesIO()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
254 fig.savefig(buf, format="png", dpi=150)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
255 plt.close(fig)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
256 encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
257 return encoded
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
258
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
259
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
260 def render_feature_importance_table(df: pd.DataFrame) -> str:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
261 """Render a sortable HTML table for feature importance values."""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
262 if df.empty:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
263 return ""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
264
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
265 columns = list(df.columns)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
266 headers = "".join(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
267 f"<th class='sortable'>{html.escape(str(col).replace('_', ' '))}</th>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
268 for col in columns
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
269 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
270
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
271 body_rows = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
272 for _, row in df.iterrows():
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
273 cells = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
274 for col in columns:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
275 val = row[col]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
276 if isinstance(val, float):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
277 val_str = f"{val:.6f}"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
278 else:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
279 val_str = str(val)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
280 cells.append(f"<td>{html.escape(val_str)}</td>")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
281 body_rows.append("<tr>" + "".join(cells) + "</tr>")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
282
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
283 return (
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
284 "<div class='scroll-rows-30'>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
285 "<table class='feature-importance-table sortable-table'>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
286 f"<thead><tr>{headers}</tr></thead>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
287 f"<tbody>{''.join(body_rows)}</tbody>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
288 "</table>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
289 "</div>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
290 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
291
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
292
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
293 def compute_feature_importance(ludwig_output_directory_name,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
294 sample_size=200,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
295 random_seed=42):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
296 ludwig_output_directory = os.path.join(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
297 output_directory, ludwig_output_directory_name)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
298 model_dir = os.path.join(ludwig_output_directory, "model")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
299
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
300 output_csv_path = os.path.join(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
301 ludwig_output_directory, "feature_importance.csv")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
302
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
303 if not os.path.exists(model_dir):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
304 LOG.info("Model directory not found; skipping feature importance computation")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
305 return
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
306
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
307 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
308 ludwig_model = LudwigModel.load(model_dir)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
309 except Exception as exc:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
310 LOG.warning(f"Unable to load Ludwig model for explanations: {exc}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
311 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
312
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
313 training_metadata = getattr(ludwig_model, "training_set_metadata", {})
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
314
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
315 output_feature_name, dataset_path = get_output_feature_name(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
316 ludwig_output_directory)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
317
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
318 if not output_feature_name or not dataset_path:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
319 LOG.warning("Output feature or dataset path missing; skipping feature importance")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
320 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
321 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
322 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
323
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
324 dataset_full_path = _resolve_dataset_path(dataset_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
325 if not dataset_full_path:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
326 LOG.warning(f"Unable to resolve dataset path '{dataset_path}' for explanations")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
327 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
328 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
329 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
330
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
331 dataframe = _load_dataset_dataframe(dataset_full_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
332 if dataframe is None or dataframe.empty:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
333 LOG.warning("Dataset unavailable or empty; skipping feature importance")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
334 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
335 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
336 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
337
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
338 dataframe = _sanitize_dataframe_columns(dataframe)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
339
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
340 data_subset = dataframe if len(dataframe) <= sample_size else dataframe.head(sample_size)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
341 sample_df = dataframe.sample(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
342 n=min(sample_size, len(dataframe)),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
343 random_state=random_seed,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
344 replace=False,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
345 ) if len(dataframe) > sample_size else dataframe
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
346
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
347 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
348 from ludwig.explain.captum import IntegratedGradientsExplainer
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
349 except ImportError as exc:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
350 LOG.warning(f"Integrated Gradients explainer unavailable: {exc}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
351 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
352 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
353 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
354
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
355 sanitized_output_feature = sanitize_feature_name(output_feature_name)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
356
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
357 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
358 explainer = IntegratedGradientsExplainer(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
359 ludwig_model,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
360 data_subset,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
361 sample_df,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
362 sanitized_output_feature,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
363 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
364 explanations = explainer.explain()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
365 except Exception as exc:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
366 LOG.warning(f"Unable to compute feature importance: {exc}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
367 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
368 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
369 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
370
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
371 if hasattr(ludwig_model, "close"):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
372 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
373 ludwig_model.close()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
374 except Exception:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
375 pass
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
376
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
377 label_names = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
378 target_metadata = {}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
379 if isinstance(training_metadata, dict):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
380 target_metadata = training_metadata.get(sanitized_output_feature, {})
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
381
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
382 if isinstance(target_metadata, dict):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
383 if "idx2str" in target_metadata:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
384 idx2str = target_metadata["idx2str"]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
385 if isinstance(idx2str, dict):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
386 def _idx_key(item):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
387 idx_key = item[0]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
388 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
389 return (0, int(idx_key))
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
390 except (TypeError, ValueError):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
391 return (1, str(idx_key))
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
392
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
393 label_names = [value for key, value in sorted(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
394 idx2str.items(), key=_idx_key)]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
395 else:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
396 label_names = idx2str
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
397 elif "str2idx" in target_metadata and isinstance(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
398 target_metadata["str2idx"], dict):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
399 # invert mapping
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
400 label_names = [label for label, _ in sorted(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
401 target_metadata["str2idx"].items(),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
402 key=lambda item: item[1])]
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
403
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
404 rows = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
405 global_explanation = explanations.global_explanation
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
406 for label_index, label_explanation in enumerate(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
407 global_explanation.label_explanations):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
408 if label_names and label_index < len(label_names):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
409 label_value = str(label_names[label_index])
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
410 elif len(global_explanation.label_explanations) == 1:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
411 label_value = output_feature_name
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
412 else:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
413 label_value = str(label_index)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
414
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
415 for feature in label_explanation.feature_attributions:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
416 rows.append({
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
417 "label": label_value,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
418 "feature": feature.feature_name,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
419 "importance": feature.attribution,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
420 "abs_importance": abs(feature.attribution),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
421 })
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
422
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
423 if not rows:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
424 LOG.warning("No feature importance rows produced")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
425 return
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
426
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
427 importance_df = pd.DataFrame(rows)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
428 importance_df.sort_values([
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
429 "label",
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
430 "abs_importance"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
431 ], ascending=[True, False], inplace=True)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
432
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
433 importance_df.to_csv(output_csv_path, index=False)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
434
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
435 LOG.info(f"Feature importance saved to {output_csv_path}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
436
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
437
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
438 def generate_html_report(title, ludwig_output_directory_name):
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
439 plots_html = ""
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
440 plot_files = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
441 if os.path.isdir(viz_output_directory):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
442 plot_files = sorted(os.listdir(viz_output_directory))
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
443 if plot_files:
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
444 plots_html = "<h2>Visualizations</h2>"
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
445 for plot_file in plot_files:
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
446 plot_path = os.path.join(viz_output_directory, plot_file)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
447 if os.path.isfile(plot_path) and plot_file.endswith((".png", ".jpg")):
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
448 encoded_image = encode_image_to_base64(plot_path)
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
449 plot_title = os.path.splitext(plot_file)[0].replace("_", " ")
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
450 plots_html += (
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
451 f'<div class="plot">'
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
452 f'<h3>{plot_title}</h3>'
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
453 '<img src="data:image/png;base64,'
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
454 f'{encoded_image}" alt="{plot_file}">'
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
455 f'</div>'
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
456 )
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
457
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
458 feature_importance_html = ""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
459 importance_path = os.path.join(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
460 output_directory,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
461 ludwig_output_directory_name,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
462 "feature_importance.csv",
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
463 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
464 if os.path.exists(importance_path):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
465 try:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
466 importance_df = pd.read_csv(importance_path)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
467 if not importance_df.empty:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
468 sorted_df = (
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
469 importance_df
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
470 .sort_values(["label", "abs_importance"], ascending=[True, False])
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
471 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
472 top_rows = (
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
473 sorted_df
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
474 .groupby("label", as_index=False)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
475 .head(5)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
476 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
477 max_abs_importance = pd.to_numeric(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
478 importance_df.get("abs_importance", pd.Series(dtype=float)),
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
479 errors="coerce",
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
480 ).max()
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
481 if pd.isna(max_abs_importance):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
482 max_abs_importance = None
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
483
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
484 plot_sections = []
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
485 for label in sorted(importance_df["label"].unique()):
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
486 encoded_plot = _feature_importance_plot(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
487 importance_df[importance_df["label"] == label],
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
488 label,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
489 max_abs_importance=max_abs_importance,
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
490 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
491 if encoded_plot:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
492 plot_sections.append(
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
493 f'<div class="plot feature-importance-plot">'
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
494 f'<h3>Top features for {label}</h3>'
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
495 f'<img src="data:image/png;base64,{encoded_plot}" '
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
496 f'alt="Feature importance plot for {label}">'
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
497 f'</div>'
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
498 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
499 explanation_text = (
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
500 "<p>Feature importance scores come from Ludwig's Integrated Gradients explainer. "
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
501 "It interpolates between each example and a neutral baseline sample, summing "
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
502 "the change in the model output along that path. Higher |importance| values "
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
503 "indicate stronger influence. Plots share a common x-axis to make magnitudes "
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
504 "comparable across labels, and the table columns can be sorted for quick scans.</p>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
505 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
506 feature_importance_html = (
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
507 "<h2>Feature Importance</h2>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
508 + explanation_text
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
509 + render_feature_importance_table(top_rows)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
510 + "".join(plot_sections)
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
511 )
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
512 except Exception as exc:
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
513 LOG.info(f"Unable to embed feature importance table: {exc}")
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
514
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
515 # Generate the full HTML content
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
516 feature_section = feature_importance_html or "<p>No feature importance artifacts were generated.</p>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
517 viz_section = plots_html or "<p>No visualizations were generated.</p>"
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
518 tabs_style = """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
519 <style>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
520 .tabs {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
521 display: flex;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
522 border-bottom: 2px solid #ccc;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
523 margin-top: 20px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
524 margin-bottom: 1rem;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
525 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
526 .tablink {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
527 padding: 9px 18px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
528 cursor: pointer;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
529 border: 1px solid #ccc;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
530 border-bottom: none;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
531 background: #f9f9f9;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
532 margin-right: 5px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
533 border-top-left-radius: 8px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
534 border-top-right-radius: 8px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
535 font-size: 0.95rem;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
536 font-weight: 500;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
537 font-family: Arial, sans-serif;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
538 color: #4A4A4A;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
539 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
540 .tablink.active {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
541 background: #ffffff;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
542 font-weight: bold;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
543 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
544 .tabcontent {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
545 border: 1px solid #ccc;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
546 border-top: none;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
547 padding: 20px;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
548 display: none;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
549 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
550 .tabcontent.active {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
551 display: block;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
552 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
553 </style>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
554 """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
555 tabs_script = """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
556 <script>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
557 function openTab(evt, tabId) {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
558 var i, tabcontent, tablinks;
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
559 tabcontent = document.getElementsByClassName("tabcontent");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
560 for (i = 0; i < tabcontent.length; i++) {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
561 tabcontent[i].style.display = "none";
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
562 tabcontent[i].classList.remove("active");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
563 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
564 tablinks = document.getElementsByClassName("tablink");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
565 for (i = 0; i < tablinks.length; i++) {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
566 tablinks[i].classList.remove("active");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
567 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
568 var current = document.getElementById(tabId);
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
569 if (current) {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
570 current.style.display = "block";
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
571 current.classList.add("active");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
572 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
573 if (evt && evt.currentTarget) {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
574 evt.currentTarget.classList.add("active");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
575 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
576 }
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
577 document.addEventListener("DOMContentLoaded", function() {
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
578 openTab({currentTarget: document.querySelector(".tablink")}, "viz-tab");
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
579 });
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
580 </script>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
581 """
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
582 tabs_html = f"""
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
583 <div class="tabs">
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
584 <button class="tablink active" onclick="openTab(event, 'viz-tab')">Visualizations</button>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
585 <button class="tablink" onclick="openTab(event, 'feature-tab')">Feature Importance</button>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
586 </div>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
587 <div id="viz-tab" class="tabcontent active">
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
588 {viz_section}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
589 </div>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
590 <div id="feature-tab" class="tabcontent">
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
591 {feature_section}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
592 </div>
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
593 """
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
594 html_content = f"""
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
595 {get_html_template()}
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
596 <h1>{title}</h1>
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
597 {tabs_style}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
598 {tabs_html}
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
599 {tabs_script}
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
600 {get_html_closing()}
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
601 """
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
602
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
603 # Save the HTML report
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
604 title: str
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
605 report_name = title.lower().replace(" ", "_")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
606 report_path = os.path.join(output_directory, f"{report_name}_report.html")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
607 with open(report_path, "w") as report_file:
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
608 report_file.write(html_content)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
609
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
610 LOG.info(f"HTML report generated at: {report_path}")
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
611
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
612
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
613 if __name__ == "__main__":
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
614
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
615 cli(sys.argv[1:])
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
616
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
617 ludwig_output_directory_name = "experiment_run"
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
618
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
619 make_visualizations(ludwig_output_directory_name)
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
620 convert_parquet_to_csv(ludwig_output_directory_name)
7
b7ed0e483e4d planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit e2ab4c0f9ce8b7a0a48f749ef5dd9899d6c2b1f8
goeckslab
parents: 4
diff changeset
621 compute_feature_importance(ludwig_output_directory_name)
0
183adfc24076 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
622 generate_html_report("Ludwig Experiment", ludwig_output_directory_name)