comparison html_structure.py @ 12:bcfa2e234a80 draft

planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
author goeckslab
date Fri, 21 Nov 2025 15:58:13 +0000
parents
children
comparison
equal deleted inserted replaced
11:c5150cceab47 12:bcfa2e234a80
1 import base64
2 import json
3 from typing import Any, Dict, Optional
4
5 from constants import METRIC_DISPLAY_NAMES
6 from utils import detect_output_type, extract_metrics_from_json
7
8
9 def generate_table_row(cells, styles):
10 """Helper function to generate an HTML table row."""
11 return (
12 "<tr>"
13 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells)
14 + "</tr>"
15 )
16
17
18 def format_config_table_html(
19 config: dict,
20 split_info: Optional[str] = None,
21 training_progress: dict = None,
22 output_type: Optional[str] = None,
23 ) -> str:
24 display_keys = [
25 "task_type",
26 "model_name",
27 "epochs",
28 "batch_size",
29 "fine_tune",
30 "use_pretrained",
31 "learning_rate",
32 "random_seed",
33 "early_stop",
34 "threshold",
35 ]
36
37 rows = []
38
39 for key in display_keys:
40 val = config.get(key, None)
41 if key == "threshold":
42 if output_type != "binary":
43 continue
44 val = val if val is not None else 0.5
45 val_str = f"{val:.2f}"
46 if val == 0.5:
47 val_str += " (default)"
48 else:
49 if key == "task_type":
50 val_str = val.title() if isinstance(val, str) else "N/A"
51 elif key == "batch_size":
52 if val is not None:
53 val_str = int(val)
54 else:
55 val = "auto"
56 val_str = "auto"
57 resolved_val = None
58 if val is None or val == "auto":
59 if training_progress:
60 resolved_val = training_progress.get("batch_size")
61 val = (
62 "Auto-selected batch size by Ludwig:<br>"
63 f"<span style='font-size: 0.85em;'>"
64 f"{resolved_val if resolved_val else val}</span><br>"
65 "<span style='font-size: 0.85em;'>"
66 "Based on model architecture and training setup "
67 "(e.g., fine-tuning).<br>"
68 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
69 "#trainer-parameters' target='_blank'>"
70 "Ludwig Trainer Parameters</a> for details."
71 "</span>"
72 )
73 else:
74 val = (
75 "Auto-selected by Ludwig<br>"
76 "<span style='font-size: 0.85em;'>"
77 "Automatically tuned based on architecture and dataset.<br>"
78 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
79 "#trainer-parameters' target='_blank'>"
80 "Ludwig Trainer Parameters</a> for details."
81 "</span>"
82 )
83 elif key == "learning_rate":
84 if val is not None and val != "auto":
85 val_str = f"{val:.6f}"
86 else:
87 if training_progress:
88 resolved_val = training_progress.get("learning_rate")
89 val_str = (
90 "Auto-selected learning rate by Ludwig:<br>"
91 f"<span style='font-size: 0.85em;'>"
92 f"{resolved_val if resolved_val else 'auto'}</span><br>"
93 "<span style='font-size: 0.85em;'>"
94 "Based on model architecture and training setup "
95 "(e.g., fine-tuning).<br>"
96 "</span>"
97 )
98 else:
99 val_str = (
100 "Auto-selected by Ludwig<br>"
101 "<span style='font-size: 0.85em;'>"
102 "Automatically tuned based on architecture and dataset.<br>"
103 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
104 "#trainer-parameters' target='_blank'>"
105 "Ludwig Trainer Parameters</a> for details."
106 "</span>"
107 )
108 elif key == "epochs":
109 if val is None:
110 val_str = "N/A"
111 else:
112 if (
113 training_progress
114 and "epoch" in training_progress
115 and val > training_progress["epoch"]
116 ):
117 val_str = (
118 f"Because of early stopping: the training "
119 f"stopped at epoch {training_progress['epoch']}"
120 )
121 else:
122 val_str = val
123 else:
124 val_str = val if val is not None else "N/A"
125 if val_str == "N/A" and key not in ["task_type"]:
126 continue
127 rows.append(
128 f"<tr>"
129 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
130 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
131 f"{key.replace('_', ' ').title()}</td>"
132 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
133 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
134 f"{val_str}</td>"
135 f"</tr>"
136 )
137
138 aug_cfg = config.get("augmentation")
139 if aug_cfg:
140 types = [str(a.get("type", "")) for a in aug_cfg]
141 aug_val = ", ".join(types)
142 rows.append(
143 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
144 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>"
145 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
146 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>"
147 )
148
149 if split_info:
150 rows.append(
151 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
152 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>"
153 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
154 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>"
155 )
156
157 html = f"""
158 <h2 style="text-align: center;">Model and Training Summary</h2>
159 <div style="display: flex; justify-content: center;">
160 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;">
161 <thead><tr>
162 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th>
163 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th>
164 </tr></thead>
165 <tbody>
166 {"".join(rows)}
167 </tbody>
168 </table>
169 </div><br>
170 <p style="text-align: center; font-size: 0.9em;">
171 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>.
172 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer">
173 Ludwig documentation provides detailed information about default model and training parameters
174 </a>
175 </p><hr>
176 """
177 return html
178
179
180 def get_html_template():
181 """
182 Returns the opening HTML, <head> (with CSS/JS), and opens <body> + .container.
183 Includes:
184 - Base styling for layout and tables
185 - Sortable table headers with 3-state arrows (none ⇅, asc ↑, desc ↓)
186 - A scroll helper class (.scroll-rows-30) that approximates ~30 visible rows
187 - A guarded script so initializing runs only once even if injected twice
188 """
189 return """
190 <!DOCTYPE html>
191 <html>
192 <head>
193 <meta charset="UTF-8">
194 <title>Galaxy-Ludwig Report</title>
195 <style>
196 body {
197 font-family: Arial, sans-serif;
198 margin: 0;
199 padding: 20px;
200 background-color: #f4f4f4;
201 }
202 .container {
203 max-width: 1200px;
204 margin: auto;
205 background: white;
206 padding: 20px;
207 box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
208 overflow-x: auto;
209 }
210 h1 {
211 text-align: center;
212 color: #333;
213 }
214 h2 {
215 border-bottom: 2px solid #4CAF50;
216 color: #4CAF50;
217 padding-bottom: 5px;
218 margin-top: 28px;
219 }
220
221 /* baseline table setup */
222 table {
223 border-collapse: collapse;
224 margin: 20px 0;
225 width: 100%;
226 table-layout: fixed;
227 background: #fff;
228 }
229 table, th, td {
230 border: 1px solid #ddd;
231 }
232 th, td {
233 padding: 10px;
234 text-align: center;
235 vertical-align: middle;
236 word-break: break-word;
237 white-space: normal;
238 overflow-wrap: anywhere;
239 }
240 th {
241 background-color: #4CAF50;
242 color: white;
243 }
244
245 .plot {
246 text-align: center;
247 margin: 20px 0;
248 }
249 .plot img {
250 max-width: 100%;
251 height: auto;
252 border: 1px solid #ddd;
253 }
254
255 /* -------------------
256 sortable columns (3-state: none ⇅, asc ↑, desc ↓)
257 ------------------- */
258 table.performance-summary th.sortable {
259 cursor: pointer;
260 position: relative;
261 user-select: none;
262 }
263 /* default icon space */
264 table.performance-summary th.sortable::after {
265 content: '⇅';
266 position: absolute;
267 right: 12px;
268 top: 50%;
269 transform: translateY(-50%);
270 font-size: 0.8em;
271 color: #eaf5ea; /* light on green */
272 text-shadow: 0 0 1px rgba(0,0,0,0.15);
273 }
274 /* three states override the default */
275 table.performance-summary th.sortable.sorted-none::after { content: '⇅'; color: #eaf5ea; }
276 table.performance-summary th.sortable.sorted-asc::after { content: '↑'; color: #ffffff; }
277 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; }
278
279 /* show ~30 rows with a scrollbar (tweak if you want) */
280 .scroll-rows-30 {
281 max-height: 900px; /* ~30 rows depending on row height */
282 overflow-y: auto; /* vertical scrollbar ("sidebar") */
283 overflow-x: auto;
284 }
285
286 /* Tabs + Help button (used by build_tabbed_html) */
287 .tabs {
288 display: flex;
289 align-items: center;
290 border-bottom: 2px solid #ccc;
291 margin-bottom: 1rem;
292 gap: 6px;
293 flex-wrap: wrap;
294 }
295 .tab {
296 padding: 10px 20px;
297 cursor: pointer;
298 border: 1px solid #ccc;
299 border-bottom: none;
300 background: #f9f9f9;
301 margin-right: 5px;
302 border-top-left-radius: 8px;
303 border-top-right-radius: 8px;
304 }
305 .tab.active {
306 background: white;
307 font-weight: bold;
308 }
309 .help-btn {
310 margin-left: auto;
311 padding: 6px 12px;
312 font-size: 0.9rem;
313 border: 1px solid #4CAF50;
314 border-radius: 4px;
315 background: #4CAF50;
316 color: white;
317 cursor: pointer;
318 }
319 .tab-content {
320 display: none;
321 padding: 20px;
322 border: 1px solid #ccc;
323 border-top: none;
324 background: #fff;
325 }
326 .tab-content.active {
327 display: block;
328 }
329
330 /* Modal (used by get_metrics_help_modal) */
331 .modal {
332 display: none;
333 position: fixed;
334 z-index: 9999;
335 left: 0; top: 0;
336 width: 100%; height: 100%;
337 overflow: auto;
338 background-color: rgba(0,0,0,0.4);
339 }
340 .modal-content {
341 background-color: #fefefe;
342 margin: 8% auto;
343 padding: 20px;
344 border: 1px solid #888;
345 width: 90%;
346 max-width: 900px;
347 border-radius: 8px;
348 }
349 .modal .close {
350 color: #777;
351 float: right;
352 font-size: 28px;
353 font-weight: bold;
354 line-height: 1;
355 margin-left: 8px;
356 }
357 .modal .close:hover,
358 .modal .close:focus {
359 color: black;
360 text-decoration: none;
361 cursor: pointer;
362 }
363 .metrics-guide h3 { margin-top: 20px; }
364 .metrics-guide p { margin: 6px 0; }
365 .metrics-guide ul { margin: 10px 0; padding-left: 20px; }
366 </style>
367
368 <script>
369 // Guard to avoid double-initialization if this block is included twice
370 (function(){
371 if (window.__perfSummarySortInit) return;
372 window.__perfSummarySortInit = true;
373
374 function initPerfSummarySorting() {
375 // Record original order for "back to original"
376 document.querySelectorAll('table.performance-summary tbody').forEach(tbody => {
377 Array.from(tbody.rows).forEach((row, i) => { row.dataset.originalOrder = i; });
378 });
379
380 const getText = td => (td?.innerText || '').trim();
381 const cmp = (idx, asc) => (a, b) => {
382 const v1 = getText(a.children[idx]);
383 const v2 = getText(b.children[idx]);
384 const n1 = parseFloat(v1), n2 = parseFloat(v2);
385 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric
386 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical
387 };
388
389 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => {
390 // initialize to "none"
391 th.classList.remove('sorted-asc','sorted-desc');
392 th.classList.add('sorted-none');
393
394 th.addEventListener('click', () => {
395 const table = th.closest('table');
396 const headerRow = th.parentNode;
397 const allTh = headerRow.querySelectorAll('th.sortable');
398 const tbody = table.querySelector('tbody');
399
400 // Determine current state BEFORE clearing
401 const isAsc = th.classList.contains('sorted-asc');
402 const isDesc = th.classList.contains('sorted-desc');
403
404 // Reset all headers in this row
405 allTh.forEach(x => x.classList.remove('sorted-asc','sorted-desc','sorted-none'));
406
407 // Compute next state
408 let next;
409 if (!isAsc && !isDesc) {
410 next = 'asc';
411 } else if (isAsc) {
412 next = 'desc';
413 } else {
414 next = 'none';
415 }
416 th.classList.add('sorted-' + next);
417
418 // Sort rows according to the chosen state
419 const rows = Array.from(tbody.rows);
420 if (next === 'none') {
421 rows.sort((a, b) => (a.dataset.originalOrder - b.dataset.originalOrder));
422 } else {
423 const idx = Array.from(headerRow.children).indexOf(th);
424 rows.sort(cmp(idx, next === 'asc'));
425 }
426 rows.forEach(r => tbody.appendChild(r));
427 });
428 });
429 }
430
431 // Run after DOM is ready
432 if (document.readyState === 'loading') {
433 document.addEventListener('DOMContentLoaded', initPerfSummarySorting);
434 } else {
435 initPerfSummarySorting();
436 }
437 })();
438 </script>
439 </head>
440 <body>
441 <div class="container">
442 """
443
444
445 def get_html_closing():
446 """Closes .container, body, and html."""
447 return """
448 </div>
449 </body>
450 </html>
451 """
452
453
454 def encode_image_to_base64(image_path: str) -> str:
455 """Convert an image file to a base64 encoded string."""
456 with open(image_path, "rb") as img_file:
457 return base64.b64encode(img_file.read()).decode("utf-8")
458
459
460 def json_to_nested_html_table(json_data, depth: int = 0) -> str:
461 """
462 Convert a JSON-able object to an HTML nested table.
463 Renders dicts as two-column tables (key/value) and lists as index/value rows.
464 """
465 # Base case: flat dict (no nested dict/list values)
466 if isinstance(json_data, dict) and all(
467 not isinstance(v, (dict, list)) for v in json_data.values()
468 ):
469 rows = [
470 f"<tr><th>{key}</th><td>{value}</td></tr>"
471 for key, value in json_data.items()
472 ]
473 return f"<table>{''.join(rows)}</table>"
474
475 # Base case: list of simple values
476 if isinstance(json_data, list) and all(
477 not isinstance(v, (dict, list)) for v in json_data
478 ):
479 rows = [
480 f"<tr><th>Index {i}</th><td>{value}</td></tr>"
481 for i, value in enumerate(json_data)
482 ]
483 return f"<table>{''.join(rows)}</table>"
484
485 # Recursive cases
486 if isinstance(json_data, dict):
487 rows = [
488 (
489 f"<tr><th style='text-align:left;padding-left:{depth * 20}px;'>{key}</th>"
490 f"<td>{json_to_nested_html_table(value, depth + 1)}</td></tr>"
491 )
492 for key, value in json_data.items()
493 ]
494 return f"<table>{''.join(rows)}</table>"
495
496 if isinstance(json_data, list):
497 rows = [
498 (
499 f"<tr><th style='text-align:left;padding-left:{depth * 20}px;'>[{i}]</th>"
500 f"<td>{json_to_nested_html_table(value, depth + 1)}</td></tr>"
501 )
502 for i, value in enumerate(json_data)
503 ]
504 return f"<table>{''.join(rows)}</table>"
505
506 # Primitive
507 return f"{json_data}"
508
509
510 def json_to_html_table(json_data) -> str:
511 """
512 Convert JSON (dict or string) into a vertically oriented HTML table.
513 """
514 if isinstance(json_data, str):
515 json_data = json.loads(json_data)
516 return json_to_nested_html_table(json_data)
517
518
519 def build_tabbed_html(metrics_html: str, train_val_html: str, test_html: str) -> str:
520 """
521 Build a 3-tab interface:
522 - Config and Results Summary
523 - Train/Validation Results
524 - Test Results
525 Includes a persistent "Help" button that toggles the metrics modal.
526 """
527 return f"""
528 <div class="tabs">
529 <div class="tab active" onclick="showTab('metrics')">Config and Results Summary</div>
530 <div class="tab" onclick="showTab('trainval')">Train/Validation Results</div>
531 <div class="tab" onclick="showTab('test')">Test Results</div>
532 <button id="openMetricsHelp" class="help-btn" title="Open metrics help">Help</button>
533 </div>
534
535 <div id="metrics" class="tab-content active">
536 {metrics_html}
537 </div>
538 <div id="trainval" class="tab-content">
539 {train_val_html}
540 </div>
541 <div id="test" class="tab-content">
542 {test_html}
543 </div>
544
545 <script>
546 function showTab(id) {{
547 document.querySelectorAll('.tab-content').forEach(el => el.classList.remove('active'));
548 document.querySelectorAll('.tab').forEach(el => el.classList.remove('active'));
549 document.getElementById(id).classList.add('active');
550 // find tab with matching onclick target
551 document.querySelectorAll('.tab').forEach(t => {{
552 if (t.getAttribute('onclick') && t.getAttribute('onclick').includes(id)) {{
553 t.classList.add('active');
554 }}
555 }});
556 }}
557 </script>
558 """
559
560
561 def get_metrics_help_modal() -> str:
562 """
563 Returns a ready-to-use modal with a comprehensive metrics guide and
564 the small script that wires the "Help" button to open/close the modal.
565 """
566 modal_html = (
567 '<div id="metricsHelpModal" class="modal">'
568 ' <div class="modal-content">'
569 ' <span class="close">×</span>'
570 " <h2>Model Evaluation Metrics — Help Guide</h2>"
571 ' <div class="metrics-guide">'
572 ' <h3>1) General Metrics (Regression and Classification)</h3>'
573 ' <p><strong>Loss (Regression & Classification):</strong> '
574 'Measures the difference between predicted and actual values, '
575 'optimized during training. Lower is better. '
576 'For regression, this is often Mean Squared Error (MSE) or '
577 'Mean Absolute Error (MAE). For classification, it\'s typically '
578 'cross-entropy or log loss.</p>'
579 ' <h3>2) Regression Metrics</h3>'
580 ' <p><strong>Mean Absolute Error (MAE):</strong> '
581 'Average of absolute differences between predicted and actual values, '
582 'in the same units as the target. Use for interpretable error measurement '
583 'when all errors are equally important. Less sensitive to outliers than MSE.</p>'
584 ' <p><strong>Mean Squared Error (MSE):</strong> '
585 'Average of squared differences between predicted and actual values. '
586 'Penalizes larger errors more heavily, useful when large deviations are critical. '
587 'Often used as the loss function in regression.</p>'
588 ' <p><strong>Root Mean Squared Error (RMSE):</strong> '
589 'Square root of MSE, in the same units as the target. '
590 'Balances interpretability and sensitivity to large errors. '
591 'Widely used for regression evaluation.</p>'
592 ' <p><strong>Mean Absolute Percentage Error (MAPE):</strong> '
593 'Average absolute error as a percentage of actual values. '
594 'Scale-independent, ideal for comparing relative errors across datasets. '
595 'Avoid when actual values are near zero.</p>'
596 ' <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> '
597 'Square root of mean squared percentage error. Scale-independent, '
598 'penalizes larger relative errors more than MAPE. Use for forecasting '
599 'or when relative accuracy matters.</p>'
600 ' <p><strong>R² Score:</strong> Proportion of variance in the target '
601 'explained by the model. Ranges from negative infinity to 1 (perfect prediction). '
602 'Use to assess model fit; negative values indicate poor performance '
603 'compared to predicting the mean.</p>'
604 ' <h3>3) Classification Metrics</h3>'
605 ' <p><strong>Accuracy:</strong> Proportion of correct predictions '
606 'among all predictions. Simple but misleading for imbalanced datasets, '
607 'where high accuracy may hide poor performance on minority classes.</p>'
608 ' <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives '
609 'across all classes before computing accuracy. Suitable for multiclass or '
610 'multilabel problems with imbalanced data.</p>'
611 ' <p><strong>Token Accuracy:</strong> Measures how often predicted tokens '
612 '(e.g., in sequences) match true tokens. Common in NLP tasks like text generation '
613 'or token classification.</p>'
614 ' <p><strong>Precision:</strong> Proportion of positive predictions that are '
615 'correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>'
616 ' <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives '
617 'correctly predicted (TP / (TP + FN)). Use when missing positives is risky, '
618 'e.g., disease detection.</p>'
619 ' <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). '
620 'Measures ability to identify negatives. Useful in medical testing to avoid '
621 'false alarms.</p>'
622 ' <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>'
623 ' <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric '
624 'across all classes, treating each equally. Best for balanced datasets where '
625 'all classes are equally important.</p>'
626 ' <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, '
627 'false positives, and false negatives across all classes before computing. '
628 'Ideal for imbalanced or multilabel classification.</p>'
629 ' <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics '
630 'across classes, weighted by the number of true instances per class. Balances '
631 'class importance based on frequency.</p>'
632 ' <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>'
633 ' <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged '
634 'equally across classes. Use for balanced multiclass problems.</p>'
635 ' <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC '
636 'using all instances. Best for imbalanced or multilabel classification.</p>'
637 ' <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged '
638 'across individual samples. Ideal for multilabel tasks where samples have multiple '
639 'labels.</p>'
640 ' <h3>6) Classification: ROC-AUC Variants</h3>'
641 ' <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. '
642 'AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>'
643 ' <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. '
644 'Suitable for balanced multiclass problems.</p>'
645 ' <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions '
646 'across all classes. Useful for imbalanced or multilabel settings.</p>'
647 ' <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>'
648 ' <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions '
649 'for positives and negatives, respectively.</p>'
650 ' <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions '
651 '— false alarms and missed detections.</p>'
652 ' <h3>8) Classification: Ranking Metrics</h3>'
653 ' <p><strong>Hits at K:</strong> Measures whether the true label is among the '
654 'top-K predictions. Common in recommendation systems and retrieval tasks.</p>'
655 ' <h3>9) Other Metrics (Classification)</h3>'
656 ' <p><strong>Cohen\'s Kappa:</strong> Measures agreement between predicted and '
657 'actual labels, adjusted for chance. Useful for multiclass classification with '
658 'imbalanced data.</p>'
659 ' <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure '
660 'using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>'
661 ' <h3>10) Metric Recommendations</h3>'
662 ' <ul>'
663 ' <li><strong>Regression:</strong> Use <strong>RMSE</strong> or '
664 '<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative '
665 'errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or '
666 '<strong>RMSPE</strong> when large errors are critical.</li>'
667 ' <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> '
668 'and <strong>F1</strong> for overall performance.</li>'
669 ' <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, '
670 '<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class '
671 'performance.</li>'
672 ' <li><strong>Multilabel or Imbalanced Classification:</strong> Use '
673 '<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>'
674 ' <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> '
675 'or <strong>Macro ROC-AUC</strong>.</li>'
676 ' <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> '
677 'to account for class imbalance.</li>'
678 ' <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>'
679 ' <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> '
680 'for class-wise performance in classification.</li>'
681 ' </ul>'
682 ' </div>'
683 ' </div>'
684 '</div>'
685 )
686
687 modal_js = (
688 "<script>"
689 "document.addEventListener('DOMContentLoaded', function() {"
690 " var modal = document.getElementById('metricsHelpModal');"
691 " var openBtn = document.getElementById('openMetricsHelp');"
692 " var closeBtn = modal ? modal.querySelector('.close') : null;"
693 " if (openBtn && modal) {"
694 " openBtn.addEventListener('click', function(){ modal.style.display = 'block'; });"
695 " }"
696 " if (closeBtn && modal) {"
697 " closeBtn.addEventListener('click', function(){ modal.style.display = 'none'; });"
698 " }"
699 " window.addEventListener('click', function(ev){"
700 " if (ev.target === modal) { modal.style.display = 'none'; }"
701 " });"
702 "});"
703 "</script>"
704 )
705 return modal_html + modal_js
706
707 # -----------------------------------------
708 # MODEL PERFORMANCE (Train/Val/Test) TABLE
709 # -----------------------------------------
710
711
712 def format_stats_table_html(train_stats: dict, test_stats: dict, output_type: str) -> str:
713 """Formats a combined HTML table for training, validation, and test metrics."""
714 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
715 rows = []
716 for metric_key in sorted(all_metrics["training"].keys()):
717 if (
718 metric_key in all_metrics["validation"]
719 and metric_key in all_metrics["test"]
720 ):
721 display_name = METRIC_DISPLAY_NAMES.get(
722 metric_key,
723 metric_key.replace("_", " ").title(),
724 )
725 t = all_metrics["training"].get(metric_key)
726 v = all_metrics["validation"].get(metric_key)
727 te = all_metrics["test"].get(metric_key)
728 if all(x is not None for x in [t, v, te]):
729 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"])
730
731 if not rows:
732 return "<table><tr><td>No metric values found.</td></tr></table>"
733
734 html = (
735 "<h2 style='text-align: center;'>Model Performance Summary</h2>"
736 "<div style='display: flex; justify-content: center;'>"
737 "<table class='performance-summary' style='border-collapse: collapse;'>"
738 "<thead><tr>"
739 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
740 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
741 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
742 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
743 "</tr></thead><tbody>"
744 )
745 for row in rows:
746 html += generate_table_row(
747 row,
748 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
749 )
750 html += "</tbody></table></div><br>"
751 return html
752
753 # -------------------------------------------
754 # TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE
755 # -------------------------------------------
756
757
758 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str:
759 """Format train/validation metrics into an HTML table."""
760 all_metrics = extract_metrics_from_json(train_stats, test_stats, detect_output_type(test_stats))
761 rows = []
762 for metric_key in sorted(all_metrics["training"].keys()):
763 if metric_key in all_metrics["validation"]:
764 display_name = METRIC_DISPLAY_NAMES.get(
765 metric_key,
766 metric_key.replace("_", " ").title(),
767 )
768 t = all_metrics["training"].get(metric_key)
769 v = all_metrics["validation"].get(metric_key)
770 if t is not None and v is not None:
771 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"])
772
773 if not rows:
774 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>"
775
776 html = (
777 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>"
778 "<div style='display: flex; justify-content: center;'>"
779 "<table class='performance-summary' style='border-collapse: collapse;'>"
780 "<thead><tr>"
781 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
782 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
783 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
784 "</tr></thead><tbody>"
785 )
786 for row in rows:
787 html += generate_table_row(
788 row,
789 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
790 )
791 html += "</tbody></table></div><br>"
792 return html
793
794 # -----------------------------------------
795 # TEST‐ONLY PERFORMANCE SUMMARY TABLE
796 # -----------------------------------------
797
798
799 def format_test_merged_stats_table_html(
800 test_metrics: Dict[str, Any], output_type: str
801 ) -> str:
802 """Format test metrics into an HTML table."""
803 rows = []
804 for key in sorted(test_metrics.keys()):
805 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title())
806 value = test_metrics[key]
807 if value is not None:
808 rows.append([display_name, f"{value:.4f}"])
809
810 if not rows:
811 return "<table><tr><td>No test metric values found.</td></tr></table>"
812
813 html = (
814 "<h2 style='text-align: center;'>Test Performance Summary</h2>"
815 "<div style='display: flex; justify-content: center;'>"
816 "<table class='performance-summary' style='border-collapse: collapse;'>"
817 "<thead><tr>"
818 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
819 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
820 "</tr></thead><tbody>"
821 )
822 for row in rows:
823 html += generate_table_row(
824 row,
825 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
826 )
827 html += "</tbody></table></div><br>"
828 return html