Mercurial > repos > goeckslab > image_learner
comparison utils.py @ 11:c5150cceab47 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
author | goeckslab |
---|---|
date | Sat, 18 Oct 2025 03:17:09 +0000 |
parents | b0d893d04d4c |
children |
comparison
equal
deleted
inserted
replaced
10:b0d893d04d4c | 11:c5150cceab47 |
---|---|
102 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; } | 102 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; } |
103 | 103 |
104 /* show ~30 rows with a scrollbar (tweak if you want) */ | 104 /* show ~30 rows with a scrollbar (tweak if you want) */ |
105 .scroll-rows-30 { | 105 .scroll-rows-30 { |
106 max-height: 900px; /* ~30 rows depending on row height */ | 106 max-height: 900px; /* ~30 rows depending on row height */ |
107 overflow-y: auto; /* vertical scrollbar (“sidebar”) */ | 107 overflow-y: auto; /* vertical scrollbar ("sidebar") */ |
108 overflow-x: auto; | 108 overflow-x: auto; |
109 } | 109 } |
110 | 110 |
111 /* Tabs + Help button (used by build_tabbed_html) */ | 111 /* Tabs + Help button (used by build_tabbed_html) */ |
112 .tabs { | 112 .tabs { |
210 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric | 210 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric |
211 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical | 211 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical |
212 }; | 212 }; |
213 | 213 |
214 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => { | 214 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => { |
215 // initialize to “none” | 215 // initialize to "none" |
216 th.classList.remove('sorted-asc','sorted-desc'); | 216 th.classList.remove('sorted-asc','sorted-desc'); |
217 th.classList.add('sorted-none'); | 217 th.classList.add('sorted-none'); |
218 | 218 |
219 th.addEventListener('click', () => { | 219 th.addEventListener('click', () => { |
220 const table = th.closest('table'); | 220 const table = th.closest('table'); |
392 '<div id="metricsHelpModal" class="modal">' | 392 '<div id="metricsHelpModal" class="modal">' |
393 ' <div class="modal-content">' | 393 ' <div class="modal-content">' |
394 ' <span class="close">×</span>' | 394 ' <span class="close">×</span>' |
395 " <h2>Model Evaluation Metrics — Help Guide</h2>" | 395 " <h2>Model Evaluation Metrics — Help Guide</h2>" |
396 ' <div class="metrics-guide">' | 396 ' <div class="metrics-guide">' |
397 " <h3>1) General Metrics (Regression and Classification)</h3>" | 397 ' <h3>1) General Metrics (Regression and Classification)</h3>' |
398 " <p><strong>Loss (Regression & Classification):</strong> " | 398 ' <p><strong>Loss (Regression & Classification):</strong> ' |
399 "Measures the difference between predicted and actual values, " | 399 'Measures the difference between predicted and actual values, ' |
400 "optimized during training. Lower is better. " | 400 'optimized during training. Lower is better. ' |
401 "For regression, this is often Mean Squared Error (MSE) or " | 401 'For regression, this is often Mean Squared Error (MSE) or ' |
402 "Mean Absolute Error (MAE). For classification, it’s typically " | 402 'Mean Absolute Error (MAE). For classification, it\'s typically ' |
403 "cross-entropy or log loss.</p>" | 403 'cross-entropy or log loss.</p>' |
404 " <h3>2) Regression Metrics</h3>" | 404 ' <h3>2) Regression Metrics</h3>' |
405 " <p><strong>Mean Absolute Error (MAE):</strong> " | 405 ' <p><strong>Mean Absolute Error (MAE):</strong> ' |
406 "Average of absolute differences between predicted and actual values, " | 406 'Average of absolute differences between predicted and actual values, ' |
407 "in the same units as the target. Use for interpretable error measurement " | 407 'in the same units as the target. Use for interpretable error measurement ' |
408 "when all errors are equally important. Less sensitive to outliers than MSE.</p>" | 408 'when all errors are equally important. Less sensitive to outliers than MSE.</p>' |
409 " <p><strong>Mean Squared Error (MSE):</strong> " | 409 ' <p><strong>Mean Squared Error (MSE):</strong> ' |
410 "Average of squared differences between predicted and actual values. " | 410 'Average of squared differences between predicted and actual values. ' |
411 "Penalizes larger errors more heavily, useful when large deviations are critical. " | 411 'Penalizes larger errors more heavily, useful when large deviations are critical. ' |
412 "Often used as the loss function in regression.</p>" | 412 'Often used as the loss function in regression.</p>' |
413 " <p><strong>Root Mean Squared Error (RMSE):</strong> " | 413 ' <p><strong>Root Mean Squared Error (RMSE):</strong> ' |
414 "Square root of MSE, in the same units as the target. " | 414 'Square root of MSE, in the same units as the target. ' |
415 "Balances interpretability and sensitivity to large errors. " | 415 'Balances interpretability and sensitivity to large errors. ' |
416 "Widely used for regression evaluation.</p>" | 416 'Widely used for regression evaluation.</p>' |
417 " <p><strong>Mean Absolute Percentage Error (MAPE):</strong> " | 417 ' <p><strong>Mean Absolute Percentage Error (MAPE):</strong> ' |
418 "Average absolute error as a percentage of actual values. " | 418 'Average absolute error as a percentage of actual values. ' |
419 "Scale-independent, ideal for comparing relative errors across datasets. " | 419 'Scale-independent, ideal for comparing relative errors across datasets. ' |
420 "Avoid when actual values are near zero.</p>" | 420 'Avoid when actual values are near zero.</p>' |
421 " <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> " | 421 ' <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> ' |
422 "Square root of mean squared percentage error. Scale-independent, " | 422 'Square root of mean squared percentage error. Scale-independent, ' |
423 "penalizes larger relative errors more than MAPE. Use for forecasting " | 423 'penalizes larger relative errors more than MAPE. Use for forecasting ' |
424 "or when relative accuracy matters.</p>" | 424 'or when relative accuracy matters.</p>' |
425 " <p><strong>R² Score:</strong> Proportion of variance in the target " | 425 ' <p><strong>R² Score:</strong> Proportion of variance in the target ' |
426 "explained by the model. Ranges from negative infinity to 1 (perfect prediction). " | 426 'explained by the model. Ranges from negative infinity to 1 (perfect prediction). ' |
427 "Use to assess model fit; negative values indicate poor performance " | 427 'Use to assess model fit; negative values indicate poor performance ' |
428 "compared to predicting the mean.</p>" | 428 'compared to predicting the mean.</p>' |
429 " <h3>3) Classification Metrics</h3>" | 429 ' <h3>3) Classification Metrics</h3>' |
430 " <p><strong>Accuracy:</strong> Proportion of correct predictions " | 430 ' <p><strong>Accuracy:</strong> Proportion of correct predictions ' |
431 "among all predictions. Simple but misleading for imbalanced datasets, " | 431 'among all predictions. Simple but misleading for imbalanced datasets, ' |
432 "where high accuracy may hide poor performance on minority classes.</p>" | 432 'where high accuracy may hide poor performance on minority classes.</p>' |
433 " <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives " | 433 ' <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives ' |
434 "across all classes before computing accuracy. Suitable for multiclass or " | 434 'across all classes before computing accuracy. Suitable for multiclass or ' |
435 "multilabel problems with imbalanced data.</p>" | 435 'multilabel problems with imbalanced data.</p>' |
436 " <p><strong>Token Accuracy:</strong> Measures how often predicted tokens " | 436 ' <p><strong>Token Accuracy:</strong> Measures how often predicted tokens ' |
437 "(e.g., in sequences) match true tokens. Common in NLP tasks like text generation " | 437 '(e.g., in sequences) match true tokens. Common in NLP tasks like text generation ' |
438 "or token classification.</p>" | 438 'or token classification.</p>' |
439 " <p><strong>Precision:</strong> Proportion of positive predictions that are " | 439 ' <p><strong>Precision:</strong> Proportion of positive predictions that are ' |
440 "correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>" | 440 'correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>' |
441 " <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives " | 441 ' <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives ' |
442 "correctly predicted (TP / (TP + FN)). Use when missing positives is risky, " | 442 'correctly predicted (TP / (TP + FN)). Use when missing positives is risky, ' |
443 "e.g., disease detection.</p>" | 443 'e.g., disease detection.</p>' |
444 " <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). " | 444 ' <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). ' |
445 "Measures ability to identify negatives. Useful in medical testing to avoid " | 445 'Measures ability to identify negatives. Useful in medical testing to avoid ' |
446 "false alarms.</p>" | 446 'false alarms.</p>' |
447 " <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>" | 447 ' <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>' |
448 " <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric " | 448 ' <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric ' |
449 "across all classes, treating each equally. Best for balanced datasets where " | 449 'across all classes, treating each equally. Best for balanced datasets where ' |
450 "all classes are equally important.</p>" | 450 'all classes are equally important.</p>' |
451 " <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, " | 451 ' <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, ' |
452 "false positives, and false negatives across all classes before computing. " | 452 'false positives, and false negatives across all classes before computing. ' |
453 "Ideal for imbalanced or multilabel classification.</p>" | 453 'Ideal for imbalanced or multilabel classification.</p>' |
454 " <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics " | 454 ' <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics ' |
455 "across classes, weighted by the number of true instances per class. Balances " | 455 'across classes, weighted by the number of true instances per class. Balances ' |
456 "class importance based on frequency.</p>" | 456 'class importance based on frequency.</p>' |
457 " <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>" | 457 ' <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>' |
458 " <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged " | 458 ' <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged ' |
459 "equally across classes. Use for balanced multiclass problems.</p>" | 459 'equally across classes. Use for balanced multiclass problems.</p>' |
460 " <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC " | 460 ' <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC ' |
461 "using all instances. Best for imbalanced or multilabel classification.</p>" | 461 'using all instances. Best for imbalanced or multilabel classification.</p>' |
462 " <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged " | 462 ' <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged ' |
463 "across individual samples. Ideal for multilabel tasks where samples have multiple " | 463 'across individual samples. Ideal for multilabel tasks where samples have multiple ' |
464 "labels.</p>" | 464 'labels.</p>' |
465 " <h3>6) Classification: ROC-AUC Variants</h3>" | 465 ' <h3>6) Classification: ROC-AUC Variants</h3>' |
466 " <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. " | 466 ' <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. ' |
467 "AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>" | 467 'AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>' |
468 " <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. " | 468 ' <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. ' |
469 "Suitable for balanced multiclass problems.</p>" | 469 'Suitable for balanced multiclass problems.</p>' |
470 " <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions " | 470 ' <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions ' |
471 "across all classes. Useful for imbalanced or multilabel settings.</p>" | 471 'across all classes. Useful for imbalanced or multilabel settings.</p>' |
472 " <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>" | 472 ' <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>' |
473 " <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions " | 473 ' <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions ' |
474 "for positives and negatives, respectively.</p>" | 474 'for positives and negatives, respectively.</p>' |
475 " <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions " | 475 ' <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions ' |
476 "— false alarms and missed detections.</p>" | 476 '— false alarms and missed detections.</p>' |
477 " <h3>8) Classification: Ranking Metrics</h3>" | 477 ' <h3>8) Classification: Ranking Metrics</h3>' |
478 " <p><strong>Hits at K:</strong> Measures whether the true label is among the " | 478 ' <p><strong>Hits at K:</strong> Measures whether the true label is among the ' |
479 "top-K predictions. Common in recommendation systems and retrieval tasks.</p>" | 479 'top-K predictions. Common in recommendation systems and retrieval tasks.</p>' |
480 " <h3>9) Other Metrics (Classification)</h3>" | 480 ' <h3>9) Other Metrics (Classification)</h3>' |
481 " <p><strong>Cohen's Kappa:</strong> Measures agreement between predicted and " | 481 ' <p><strong>Cohen\'s Kappa:</strong> Measures agreement between predicted and ' |
482 "actual labels, adjusted for chance. Useful for multiclass classification with " | 482 'actual labels, adjusted for chance. Useful for multiclass classification with ' |
483 "imbalanced data.</p>" | 483 'imbalanced data.</p>' |
484 " <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure " | 484 ' <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure ' |
485 "using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>" | 485 'using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>' |
486 " <h3>10) Metric Recommendations</h3>" | 486 ' <h3>10) Metric Recommendations</h3>' |
487 " <ul>" | 487 ' <ul>' |
488 " <li><strong>Regression:</strong> Use <strong>RMSE</strong> or " | 488 ' <li><strong>Regression:</strong> Use <strong>RMSE</strong> or ' |
489 "<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative " | 489 '<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative ' |
490 "errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or " | 490 'errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or ' |
491 "<strong>RMSPE</strong> when large errors are critical.</li>" | 491 '<strong>RMSPE</strong> when large errors are critical.</li>' |
492 " <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> " | 492 ' <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> ' |
493 "and <strong>F1</strong> for overall performance.</li>" | 493 'and <strong>F1</strong> for overall performance.</li>' |
494 " <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, " | 494 ' <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, ' |
495 "<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class " | 495 '<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class ' |
496 "performance.</li>" | 496 'performance.</li>' |
497 " <li><strong>Multilabel or Imbalanced Classification:</strong> Use " | 497 ' <li><strong>Multilabel or Imbalanced Classification:</strong> Use ' |
498 "<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>" | 498 '<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>' |
499 " <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> " | 499 ' <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> ' |
500 "or <strong>Macro ROC-AUC</strong>.</li>" | 500 'or <strong>Macro ROC-AUC</strong>.</li>' |
501 " <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> " | 501 ' <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> ' |
502 "to account for class imbalance.</li>" | 502 'to account for class imbalance.</li>' |
503 " <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>" | 503 ' <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>' |
504 " <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> " | 504 ' <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> ' |
505 "for class-wise performance in classification.</li>" | 505 'for class-wise performance in classification.</li>' |
506 " </ul>" | 506 ' </ul>' |
507 " </div>" | 507 ' </div>' |
508 " </div>" | 508 ' </div>' |
509 "</div>" | 509 '</div>' |
510 ) | 510 ) |
511 | 511 |
512 modal_js = ( | 512 modal_js = ( |
513 "<script>" | 513 "<script>" |
514 "document.addEventListener('DOMContentLoaded', function() {" | 514 "document.addEventListener('DOMContentLoaded', function() {" |