Mercurial > repos > goeckslab > ludwig_train
annotate ludwig_experiment.py @ 4:650639a4a75f draft default tip
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
author | goeckslab |
---|---|
date | Sat, 21 Jun 2025 15:06:22 +0000 |
parents | 4d12452c5361 |
children |
rev | line source |
---|---|
0
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
1 import json |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
2 import logging |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
3 import os |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
4 import pickle |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
5 import sys |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
6 |
4
650639a4a75f
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
goeckslab
parents:
1
diff
changeset
|
7 import pandas as pd |
0
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
8 from ludwig.experiment import cli |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
9 from ludwig.globals import ( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
10 DESCRIPTION_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
11 PREDICTIONS_PARQUET_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
12 TEST_STATISTICS_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
13 TRAIN_SET_METADATA_FILE_NAME |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
14 ) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
15 from ludwig.utils.data_utils import get_split_path |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
16 from ludwig.visualize import get_visualizations_registry |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
17 from model_unpickler import SafeUnpickler |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
18 from utils import ( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
19 encode_image_to_base64, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
20 get_html_closing, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
21 get_html_template |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
22 ) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
23 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
24 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
25 logging.basicConfig(level=logging.DEBUG) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
26 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
27 LOG = logging.getLogger(__name__) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
28 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
29 setattr(pickle, 'Unpickler', SafeUnpickler) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
30 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
31 # visualization |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
32 output_directory = None |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
33 for ix, arg in enumerate(sys.argv): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
34 if arg == "--output_directory": |
4
650639a4a75f
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
goeckslab
parents:
1
diff
changeset
|
35 output_directory = sys.argv[ix + 1] |
0
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
36 break |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
37 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
38 viz_output_directory = os.path.join(output_directory, "visualizations") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
39 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
40 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
41 def get_output_feature_name(experiment_dir, output_feature=0): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
42 """Helper function to extract specified output feature name. |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
43 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
44 :param experiment_dir: Path to the experiment directory |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
45 :param output_feature: position of the output feature the description.json |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
46 :return output_feature_name: name of the first output feature name |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
47 from the experiment |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
48 """ |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
49 if os.path.exists(os.path.join(experiment_dir, DESCRIPTION_FILE_NAME)): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
50 description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
51 with open(description_file, "rb") as f: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
52 content = json.load(f) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
53 output_feature_name = \ |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
54 content["config"]["output_features"][output_feature]["name"] |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
55 dataset_path = content["dataset"] |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
56 return output_feature_name, dataset_path |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
57 return None, None |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
58 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
59 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
60 def check_file(file_path): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
61 """Check if the file exists; return None if it doesn't.""" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
62 return file_path if os.path.exists(file_path) else None |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
63 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
64 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
65 def make_visualizations(ludwig_output_directory_name): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
66 ludwig_output_directory = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
67 output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
68 ludwig_output_directory_name, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
69 ) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
70 visualizations = [ |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
71 "confidence_thresholding", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
72 "confidence_thresholding_data_vs_acc", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
73 "confidence_thresholding_data_vs_acc_subset", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
74 "confidence_thresholding_data_vs_acc_subset_per_class", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
75 "confidence_thresholding_2thresholds_2d", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
76 "confidence_thresholding_2thresholds_3d", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
77 "binary_threshold_vs_metric", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
78 "roc_curves", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
79 "roc_curves_from_test_statistics", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
80 "calibration_1_vs_all", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
81 "calibration_multiclass", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
82 "confusion_matrix", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
83 "frequency_vs_f1", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
84 "learning_curves", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
85 ] |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
86 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
87 # Check existence of required files |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
88 training_statistics = check_file(os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
89 ludwig_output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
90 "training_statistics.json", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
91 )) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
92 test_statistics = check_file(os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
93 ludwig_output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
94 TEST_STATISTICS_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
95 )) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
96 ground_truth_metadata = check_file(os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
97 ludwig_output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
98 "model", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
99 TRAIN_SET_METADATA_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
100 )) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
101 probabilities = check_file(os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
102 ludwig_output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
103 PREDICTIONS_PARQUET_FILE_NAME, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
104 )) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
105 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
106 output_feature, dataset_path = get_output_feature_name( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
107 ludwig_output_directory) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
108 ground_truth = None |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
109 split_file = None |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
110 if dataset_path: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
111 ground_truth = check_file(dataset_path) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
112 split_file = check_file(get_split_path(dataset_path)) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
113 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
114 if (not output_feature) and (test_statistics): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
115 test_stat = os.path.join(test_statistics) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
116 with open(test_stat, "rb") as f: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
117 content = json.load(f) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
118 output_feature = next(iter(content.keys())) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
119 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
120 for viz in visualizations: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
121 viz_func = get_visualizations_registry()[viz] |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
122 try: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
123 viz_func( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
124 training_statistics=[training_statistics] |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
125 if training_statistics else [], |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
126 test_statistics=[test_statistics] if test_statistics else [], |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
127 probabilities=[probabilities] if probabilities else [], |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
128 top_n_classes=[0], |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
129 output_feature_name=output_feature if output_feature else "", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
130 ground_truth_split=2, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
131 top_k=3, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
132 ground_truth_metadata=ground_truth_metadata, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
133 ground_truth=ground_truth, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
134 split_file=split_file, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
135 output_directory=viz_output_directory, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
136 normalize=False, |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
137 file_format="png", |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
138 ) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
139 except Exception as e: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
140 LOG.info(f"Visualization: {viz}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
141 LOG.info(f"Error: {e}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
142 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
143 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
144 def convert_parquet_to_csv(ludwig_output_directory_name): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
145 """Convert the predictions Parquet file to CSV.""" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
146 ludwig_output_directory = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
147 output_directory, ludwig_output_directory_name) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
148 parquet_path = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
149 ludwig_output_directory, "predictions.parquet") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
150 csv_path = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
151 ludwig_output_directory, "predictions_parquet.csv") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
152 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
153 try: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
154 df = pd.read_parquet(parquet_path) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
155 df.to_csv(csv_path, index=False) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
156 LOG.info(f"Converted Parquet to CSV: {csv_path}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
157 except Exception as e: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
158 LOG.error(f"Error converting Parquet to CSV: {e}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
159 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
160 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
161 def generate_html_report(title, ludwig_output_directory_name): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
162 # ludwig_output_directory = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
163 # output_directory, ludwig_output_directory_name) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
164 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
165 # test_statistics_html = "" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
166 # # Read test statistics JSON and convert to HTML table |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
167 # try: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
168 # test_statistics_path = os.path.join( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
169 # ludwig_output_directory, TEST_STATISTICS_FILE_NAME) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
170 # with open(test_statistics_path, "r") as f: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
171 # test_statistics = json.load(f) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
172 # test_statistics_html = "<h2>Test Statistics</h2>" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
173 # test_statistics_html += json_to_html_table( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
174 # test_statistics) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
175 # except Exception as e: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
176 # LOG.info(f"Error reading test statistics: {e}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
177 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
178 # Convert visualizations to HTML |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
179 plots_html = "" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
180 if len(os.listdir(viz_output_directory)) > 0: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
181 plots_html = "<h2>Visualizations</h2>" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
182 for plot_file in sorted(os.listdir(viz_output_directory)): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
183 plot_path = os.path.join(viz_output_directory, plot_file) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
184 if os.path.isfile(plot_path) and plot_file.endswith((".png", ".jpg")): |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
185 encoded_image = encode_image_to_base64(plot_path) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
186 plots_html += ( |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
187 f'<div class="plot">' |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
188 f'<h3>{os.path.splitext(plot_file)[0]}</h3>' |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
189 '<img src="data:image/png;base64,' |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
190 f'{encoded_image}" alt="{plot_file}">' |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
191 f'</div>' |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
192 ) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
193 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
194 # Generate the full HTML content |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
195 html_content = f""" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
196 {get_html_template()} |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
197 <h1>{title}</h1> |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
198 {plots_html} |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
199 {get_html_closing()} |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
200 """ |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
201 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
202 # Save the HTML report |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
203 title: str |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
204 report_name = title.lower().replace(" ", "_") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
205 report_path = os.path.join(output_directory, f"{report_name}_report.html") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
206 with open(report_path, "w") as report_file: |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
207 report_file.write(html_content) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
208 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
209 LOG.info(f"HTML report generated at: {report_path}") |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
210 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
211 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
212 if __name__ == "__main__": |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
213 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
214 cli(sys.argv[1:]) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
215 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
216 ludwig_output_directory_name = "experiment_run" |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
217 |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
218 make_visualizations(ludwig_output_directory_name) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
219 convert_parquet_to_csv(ludwig_output_directory_name) |
f0be10937f5c
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
goeckslab
parents:
diff
changeset
|
220 generate_html_report("Ludwig Experiment", ludwig_output_directory_name) |