annotate ludwig_experiment.py @ 2:65c50cd5aa13 draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 008bc5a8ee2c80f70fc224a23b5d2cd0c8ef1810
author goeckslab
date Fri, 14 Mar 2025 16:49:06 +0000
parents d5d69c29d537
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
1 import json
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
2 import logging
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
3 import os
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
4 import pickle
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
5 import sys
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
6
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
7 from ludwig.experiment import cli
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
8 from ludwig.globals import (
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
9 DESCRIPTION_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
10 PREDICTIONS_PARQUET_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
11 TEST_STATISTICS_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
12 TRAIN_SET_METADATA_FILE_NAME
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
13 )
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
14 from ludwig.utils.data_utils import get_split_path
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
15 from ludwig.visualize import get_visualizations_registry
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
16
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
17 from model_unpickler import SafeUnpickler
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
18
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
19 import pandas as pd
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
20
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
21 from utils import (
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
22 encode_image_to_base64,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
23 get_html_closing,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
24 get_html_template
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
25 )
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
26
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
27
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
28 logging.basicConfig(level=logging.DEBUG)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
29
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
30 LOG = logging.getLogger(__name__)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
31
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
32 setattr(pickle, 'Unpickler', SafeUnpickler)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
33
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
34 # visualization
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
35 output_directory = None
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
36 for ix, arg in enumerate(sys.argv):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
37 if arg == "--output_directory":
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
38 output_directory = sys.argv[ix+1]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
39 break
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
40
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
41 viz_output_directory = os.path.join(output_directory, "visualizations")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
42
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
43
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
44 def get_output_feature_name(experiment_dir, output_feature=0):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
45 """Helper function to extract specified output feature name.
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
46
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
47 :param experiment_dir: Path to the experiment directory
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
48 :param output_feature: position of the output feature the description.json
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
49 :return output_feature_name: name of the first output feature name
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
50 from the experiment
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
51 """
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
52 if os.path.exists(os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
53 description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
54 with open(description_file, "rb") as f:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
55 content = json.load(f)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
56 output_feature_name = \
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
57 content["config"]["output_features"][output_feature]["name"]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
58 dataset_path = content["dataset"]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
59 return output_feature_name, dataset_path
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
60 return None, None
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
61
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
62
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
63 def check_file(file_path):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
64 """Check if the file exists; return None if it doesn't."""
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
65 return file_path if os.path.exists(file_path) else None
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
66
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
67
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
68 def make_visualizations(ludwig_output_directory_name):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
69 ludwig_output_directory = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
70 output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
71 ludwig_output_directory_name,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
72 )
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
73 visualizations = [
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
74 "confidence_thresholding",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
75 "confidence_thresholding_data_vs_acc",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
76 "confidence_thresholding_data_vs_acc_subset",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
77 "confidence_thresholding_data_vs_acc_subset_per_class",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
78 "confidence_thresholding_2thresholds_2d",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
79 "confidence_thresholding_2thresholds_3d",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
80 "binary_threshold_vs_metric",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
81 "roc_curves",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
82 "roc_curves_from_test_statistics",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
83 "calibration_1_vs_all",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
84 "calibration_multiclass",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
85 "confusion_matrix",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
86 "frequency_vs_f1",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
87 "learning_curves",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
88 ]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
89
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
90 # Check existence of required files
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
91 training_statistics = check_file(os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
92 ludwig_output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
93 "training_statistics.json",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
94 ))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
95 test_statistics = check_file(os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
96 ludwig_output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
97 TEST_STATISTICS_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
98 ))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
99 ground_truth_metadata = check_file(os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
100 ludwig_output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
101 "model",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
102 TRAIN_SET_METADATA_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
103 ))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
104 probabilities = check_file(os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
105 ludwig_output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
106 PREDICTIONS_PARQUET_FILE_NAME,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
107 ))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
108
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
109 output_feature, dataset_path = get_output_feature_name(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
110 ludwig_output_directory)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
111 ground_truth = None
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
112 split_file = None
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
113 if dataset_path:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
114 ground_truth = check_file(dataset_path)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
115 split_file = check_file(get_split_path(dataset_path))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
116
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
117 if (not output_feature) and (test_statistics):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
118 test_stat = os.path.join(test_statistics)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
119 with open(test_stat, "rb") as f:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
120 content = json.load(f)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
121 output_feature = next(iter(content.keys()))
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
122
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
123 for viz in visualizations:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
124 viz_func = get_visualizations_registry()[viz]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
125 try:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
126 viz_func(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
127 training_statistics=[training_statistics]
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
128 if training_statistics else [],
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
129 test_statistics=[test_statistics] if test_statistics else [],
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
130 probabilities=[probabilities] if probabilities else [],
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
131 top_n_classes=[0],
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
132 output_feature_name=output_feature if output_feature else "",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
133 ground_truth_split=2,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
134 top_k=3,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
135 ground_truth_metadata=ground_truth_metadata,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
136 ground_truth=ground_truth,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
137 split_file=split_file,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
138 output_directory=viz_output_directory,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
139 normalize=False,
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
140 file_format="png",
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
141 )
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
142 except Exception as e:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
143 LOG.info(f"Visualization: {viz}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
144 LOG.info(f"Error: {e}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
145
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
146
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
147 def convert_parquet_to_csv(ludwig_output_directory_name):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
148 """Convert the predictions Parquet file to CSV."""
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
149 ludwig_output_directory = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
150 output_directory, ludwig_output_directory_name)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
151 parquet_path = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
152 ludwig_output_directory, "predictions.parquet")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
153 csv_path = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
154 ludwig_output_directory, "predictions_parquet.csv")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
155
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
156 try:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
157 df = pd.read_parquet(parquet_path)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
158 df.to_csv(csv_path, index=False)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
159 LOG.info(f"Converted Parquet to CSV: {csv_path}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
160 except Exception as e:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
161 LOG.error(f"Error converting Parquet to CSV: {e}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
162
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
163
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
164 def generate_html_report(title, ludwig_output_directory_name):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
165 # ludwig_output_directory = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
166 # output_directory, ludwig_output_directory_name)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
167
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
168 # test_statistics_html = ""
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
169 # # Read test statistics JSON and convert to HTML table
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
170 # try:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
171 # test_statistics_path = os.path.join(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
172 # ludwig_output_directory, TEST_STATISTICS_FILE_NAME)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
173 # with open(test_statistics_path, "r") as f:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
174 # test_statistics = json.load(f)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
175 # test_statistics_html = "<h2>Test Statistics</h2>"
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
176 # test_statistics_html += json_to_html_table(
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
177 # test_statistics)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
178 # except Exception as e:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
179 # LOG.info(f"Error reading test statistics: {e}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
180
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
181 # Convert visualizations to HTML
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
182 plots_html = ""
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
183 if len(os.listdir(viz_output_directory)) > 0:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
184 plots_html = "<h2>Visualizations</h2>"
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
185 for plot_file in sorted(os.listdir(viz_output_directory)):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
186 plot_path = os.path.join(viz_output_directory, plot_file)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
187 if os.path.isfile(plot_path) and plot_file.endswith((".png", ".jpg")):
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
188 encoded_image = encode_image_to_base64(plot_path)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
189 plots_html += (
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
190 f'<div class="plot">'
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
191 f'<h3>{os.path.splitext(plot_file)[0]}</h3>'
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
192 '<img src="data:image/png;base64,'
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
193 f'{encoded_image}" alt="{plot_file}">'
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
194 f'</div>'
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
195 )
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
196
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
197 # Generate the full HTML content
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
198 html_content = f"""
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
199 {get_html_template()}
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
200 <h1>{title}</h1>
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
201 {plots_html}
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
202 {get_html_closing()}
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
203 """
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
204
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
205 # Save the HTML report
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
206 title: str
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
207 report_name = title.lower().replace(" ", "_")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
208 report_path = os.path.join(output_directory, f"{report_name}_report.html")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
209 with open(report_path, "w") as report_file:
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
210 report_file.write(html_content)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
211
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
212 LOG.info(f"HTML report generated at: {report_path}")
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
213
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
214
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
215 if __name__ == "__main__":
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
216
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
217 cli(sys.argv[1:])
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
218
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
219 ludwig_output_directory_name = "experiment_run"
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
220
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
221 make_visualizations(ludwig_output_directory_name)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
222 convert_parquet_to_csv(ludwig_output_directory_name)
ed8a9ea5bc73 planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff changeset
223 generate_html_report("Ludwig Experiment", ludwig_output_directory_name)