Mercurial > repos > goeckslab > image_learner
comparison utils.py @ 11:c5150cceab47 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
| author | goeckslab | 
|---|---|
| date | Sat, 18 Oct 2025 03:17:09 +0000 | 
| parents | b0d893d04d4c | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 10:b0d893d04d4c | 11:c5150cceab47 | 
|---|---|
| 102 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; } | 102 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; } | 
| 103 | 103 | 
| 104 /* show ~30 rows with a scrollbar (tweak if you want) */ | 104 /* show ~30 rows with a scrollbar (tweak if you want) */ | 
| 105 .scroll-rows-30 { | 105 .scroll-rows-30 { | 
| 106 max-height: 900px; /* ~30 rows depending on row height */ | 106 max-height: 900px; /* ~30 rows depending on row height */ | 
| 107 overflow-y: auto; /* vertical scrollbar (“sidebar”) */ | 107 overflow-y: auto; /* vertical scrollbar ("sidebar") */ | 
| 108 overflow-x: auto; | 108 overflow-x: auto; | 
| 109 } | 109 } | 
| 110 | 110 | 
| 111 /* Tabs + Help button (used by build_tabbed_html) */ | 111 /* Tabs + Help button (used by build_tabbed_html) */ | 
| 112 .tabs { | 112 .tabs { | 
| 210 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric | 210 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric | 
| 211 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical | 211 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical | 
| 212 }; | 212 }; | 
| 213 | 213 | 
| 214 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => { | 214 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => { | 
| 215 // initialize to “none” | 215 // initialize to "none" | 
| 216 th.classList.remove('sorted-asc','sorted-desc'); | 216 th.classList.remove('sorted-asc','sorted-desc'); | 
| 217 th.classList.add('sorted-none'); | 217 th.classList.add('sorted-none'); | 
| 218 | 218 | 
| 219 th.addEventListener('click', () => { | 219 th.addEventListener('click', () => { | 
| 220 const table = th.closest('table'); | 220 const table = th.closest('table'); | 
| 392 '<div id="metricsHelpModal" class="modal">' | 392 '<div id="metricsHelpModal" class="modal">' | 
| 393 ' <div class="modal-content">' | 393 ' <div class="modal-content">' | 
| 394 ' <span class="close">×</span>' | 394 ' <span class="close">×</span>' | 
| 395 " <h2>Model Evaluation Metrics — Help Guide</h2>" | 395 " <h2>Model Evaluation Metrics — Help Guide</h2>" | 
| 396 ' <div class="metrics-guide">' | 396 ' <div class="metrics-guide">' | 
| 397 " <h3>1) General Metrics (Regression and Classification)</h3>" | 397 ' <h3>1) General Metrics (Regression and Classification)</h3>' | 
| 398 " <p><strong>Loss (Regression & Classification):</strong> " | 398 ' <p><strong>Loss (Regression & Classification):</strong> ' | 
| 399 "Measures the difference between predicted and actual values, " | 399 'Measures the difference between predicted and actual values, ' | 
| 400 "optimized during training. Lower is better. " | 400 'optimized during training. Lower is better. ' | 
| 401 "For regression, this is often Mean Squared Error (MSE) or " | 401 'For regression, this is often Mean Squared Error (MSE) or ' | 
| 402 "Mean Absolute Error (MAE). For classification, it’s typically " | 402 'Mean Absolute Error (MAE). For classification, it\'s typically ' | 
| 403 "cross-entropy or log loss.</p>" | 403 'cross-entropy or log loss.</p>' | 
| 404 " <h3>2) Regression Metrics</h3>" | 404 ' <h3>2) Regression Metrics</h3>' | 
| 405 " <p><strong>Mean Absolute Error (MAE):</strong> " | 405 ' <p><strong>Mean Absolute Error (MAE):</strong> ' | 
| 406 "Average of absolute differences between predicted and actual values, " | 406 'Average of absolute differences between predicted and actual values, ' | 
| 407 "in the same units as the target. Use for interpretable error measurement " | 407 'in the same units as the target. Use for interpretable error measurement ' | 
| 408 "when all errors are equally important. Less sensitive to outliers than MSE.</p>" | 408 'when all errors are equally important. Less sensitive to outliers than MSE.</p>' | 
| 409 " <p><strong>Mean Squared Error (MSE):</strong> " | 409 ' <p><strong>Mean Squared Error (MSE):</strong> ' | 
| 410 "Average of squared differences between predicted and actual values. " | 410 'Average of squared differences between predicted and actual values. ' | 
| 411 "Penalizes larger errors more heavily, useful when large deviations are critical. " | 411 'Penalizes larger errors more heavily, useful when large deviations are critical. ' | 
| 412 "Often used as the loss function in regression.</p>" | 412 'Often used as the loss function in regression.</p>' | 
| 413 " <p><strong>Root Mean Squared Error (RMSE):</strong> " | 413 ' <p><strong>Root Mean Squared Error (RMSE):</strong> ' | 
| 414 "Square root of MSE, in the same units as the target. " | 414 'Square root of MSE, in the same units as the target. ' | 
| 415 "Balances interpretability and sensitivity to large errors. " | 415 'Balances interpretability and sensitivity to large errors. ' | 
| 416 "Widely used for regression evaluation.</p>" | 416 'Widely used for regression evaluation.</p>' | 
| 417 " <p><strong>Mean Absolute Percentage Error (MAPE):</strong> " | 417 ' <p><strong>Mean Absolute Percentage Error (MAPE):</strong> ' | 
| 418 "Average absolute error as a percentage of actual values. " | 418 'Average absolute error as a percentage of actual values. ' | 
| 419 "Scale-independent, ideal for comparing relative errors across datasets. " | 419 'Scale-independent, ideal for comparing relative errors across datasets. ' | 
| 420 "Avoid when actual values are near zero.</p>" | 420 'Avoid when actual values are near zero.</p>' | 
| 421 " <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> " | 421 ' <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> ' | 
| 422 "Square root of mean squared percentage error. Scale-independent, " | 422 'Square root of mean squared percentage error. Scale-independent, ' | 
| 423 "penalizes larger relative errors more than MAPE. Use for forecasting " | 423 'penalizes larger relative errors more than MAPE. Use for forecasting ' | 
| 424 "or when relative accuracy matters.</p>" | 424 'or when relative accuracy matters.</p>' | 
| 425 " <p><strong>R² Score:</strong> Proportion of variance in the target " | 425 ' <p><strong>R² Score:</strong> Proportion of variance in the target ' | 
| 426 "explained by the model. Ranges from negative infinity to 1 (perfect prediction). " | 426 'explained by the model. Ranges from negative infinity to 1 (perfect prediction). ' | 
| 427 "Use to assess model fit; negative values indicate poor performance " | 427 'Use to assess model fit; negative values indicate poor performance ' | 
| 428 "compared to predicting the mean.</p>" | 428 'compared to predicting the mean.</p>' | 
| 429 " <h3>3) Classification Metrics</h3>" | 429 ' <h3>3) Classification Metrics</h3>' | 
| 430 " <p><strong>Accuracy:</strong> Proportion of correct predictions " | 430 ' <p><strong>Accuracy:</strong> Proportion of correct predictions ' | 
| 431 "among all predictions. Simple but misleading for imbalanced datasets, " | 431 'among all predictions. Simple but misleading for imbalanced datasets, ' | 
| 432 "where high accuracy may hide poor performance on minority classes.</p>" | 432 'where high accuracy may hide poor performance on minority classes.</p>' | 
| 433 " <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives " | 433 ' <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives ' | 
| 434 "across all classes before computing accuracy. Suitable for multiclass or " | 434 'across all classes before computing accuracy. Suitable for multiclass or ' | 
| 435 "multilabel problems with imbalanced data.</p>" | 435 'multilabel problems with imbalanced data.</p>' | 
| 436 " <p><strong>Token Accuracy:</strong> Measures how often predicted tokens " | 436 ' <p><strong>Token Accuracy:</strong> Measures how often predicted tokens ' | 
| 437 "(e.g., in sequences) match true tokens. Common in NLP tasks like text generation " | 437 '(e.g., in sequences) match true tokens. Common in NLP tasks like text generation ' | 
| 438 "or token classification.</p>" | 438 'or token classification.</p>' | 
| 439 " <p><strong>Precision:</strong> Proportion of positive predictions that are " | 439 ' <p><strong>Precision:</strong> Proportion of positive predictions that are ' | 
| 440 "correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>" | 440 'correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>' | 
| 441 " <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives " | 441 ' <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives ' | 
| 442 "correctly predicted (TP / (TP + FN)). Use when missing positives is risky, " | 442 'correctly predicted (TP / (TP + FN)). Use when missing positives is risky, ' | 
| 443 "e.g., disease detection.</p>" | 443 'e.g., disease detection.</p>' | 
| 444 " <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). " | 444 ' <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). ' | 
| 445 "Measures ability to identify negatives. Useful in medical testing to avoid " | 445 'Measures ability to identify negatives. Useful in medical testing to avoid ' | 
| 446 "false alarms.</p>" | 446 'false alarms.</p>' | 
| 447 " <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>" | 447 ' <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>' | 
| 448 " <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric " | 448 ' <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric ' | 
| 449 "across all classes, treating each equally. Best for balanced datasets where " | 449 'across all classes, treating each equally. Best for balanced datasets where ' | 
| 450 "all classes are equally important.</p>" | 450 'all classes are equally important.</p>' | 
| 451 " <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, " | 451 ' <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, ' | 
| 452 "false positives, and false negatives across all classes before computing. " | 452 'false positives, and false negatives across all classes before computing. ' | 
| 453 "Ideal for imbalanced or multilabel classification.</p>" | 453 'Ideal for imbalanced or multilabel classification.</p>' | 
| 454 " <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics " | 454 ' <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics ' | 
| 455 "across classes, weighted by the number of true instances per class. Balances " | 455 'across classes, weighted by the number of true instances per class. Balances ' | 
| 456 "class importance based on frequency.</p>" | 456 'class importance based on frequency.</p>' | 
| 457 " <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>" | 457 ' <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>' | 
| 458 " <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged " | 458 ' <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged ' | 
| 459 "equally across classes. Use for balanced multiclass problems.</p>" | 459 'equally across classes. Use for balanced multiclass problems.</p>' | 
| 460 " <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC " | 460 ' <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC ' | 
| 461 "using all instances. Best for imbalanced or multilabel classification.</p>" | 461 'using all instances. Best for imbalanced or multilabel classification.</p>' | 
| 462 " <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged " | 462 ' <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged ' | 
| 463 "across individual samples. Ideal for multilabel tasks where samples have multiple " | 463 'across individual samples. Ideal for multilabel tasks where samples have multiple ' | 
| 464 "labels.</p>" | 464 'labels.</p>' | 
| 465 " <h3>6) Classification: ROC-AUC Variants</h3>" | 465 ' <h3>6) Classification: ROC-AUC Variants</h3>' | 
| 466 " <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. " | 466 ' <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. ' | 
| 467 "AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>" | 467 'AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>' | 
| 468 " <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. " | 468 ' <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. ' | 
| 469 "Suitable for balanced multiclass problems.</p>" | 469 'Suitable for balanced multiclass problems.</p>' | 
| 470 " <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions " | 470 ' <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions ' | 
| 471 "across all classes. Useful for imbalanced or multilabel settings.</p>" | 471 'across all classes. Useful for imbalanced or multilabel settings.</p>' | 
| 472 " <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>" | 472 ' <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>' | 
| 473 " <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions " | 473 ' <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions ' | 
| 474 "for positives and negatives, respectively.</p>" | 474 'for positives and negatives, respectively.</p>' | 
| 475 " <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions " | 475 ' <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions ' | 
| 476 "— false alarms and missed detections.</p>" | 476 '— false alarms and missed detections.</p>' | 
| 477 " <h3>8) Classification: Ranking Metrics</h3>" | 477 ' <h3>8) Classification: Ranking Metrics</h3>' | 
| 478 " <p><strong>Hits at K:</strong> Measures whether the true label is among the " | 478 ' <p><strong>Hits at K:</strong> Measures whether the true label is among the ' | 
| 479 "top-K predictions. Common in recommendation systems and retrieval tasks.</p>" | 479 'top-K predictions. Common in recommendation systems and retrieval tasks.</p>' | 
| 480 " <h3>9) Other Metrics (Classification)</h3>" | 480 ' <h3>9) Other Metrics (Classification)</h3>' | 
| 481 " <p><strong>Cohen's Kappa:</strong> Measures agreement between predicted and " | 481 ' <p><strong>Cohen\'s Kappa:</strong> Measures agreement between predicted and ' | 
| 482 "actual labels, adjusted for chance. Useful for multiclass classification with " | 482 'actual labels, adjusted for chance. Useful for multiclass classification with ' | 
| 483 "imbalanced data.</p>" | 483 'imbalanced data.</p>' | 
| 484 " <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure " | 484 ' <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure ' | 
| 485 "using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>" | 485 'using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>' | 
| 486 " <h3>10) Metric Recommendations</h3>" | 486 ' <h3>10) Metric Recommendations</h3>' | 
| 487 " <ul>" | 487 ' <ul>' | 
| 488 " <li><strong>Regression:</strong> Use <strong>RMSE</strong> or " | 488 ' <li><strong>Regression:</strong> Use <strong>RMSE</strong> or ' | 
| 489 "<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative " | 489 '<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative ' | 
| 490 "errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or " | 490 'errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or ' | 
| 491 "<strong>RMSPE</strong> when large errors are critical.</li>" | 491 '<strong>RMSPE</strong> when large errors are critical.</li>' | 
| 492 " <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> " | 492 ' <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> ' | 
| 493 "and <strong>F1</strong> for overall performance.</li>" | 493 'and <strong>F1</strong> for overall performance.</li>' | 
| 494 " <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, " | 494 ' <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, ' | 
| 495 "<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class " | 495 '<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class ' | 
| 496 "performance.</li>" | 496 'performance.</li>' | 
| 497 " <li><strong>Multilabel or Imbalanced Classification:</strong> Use " | 497 ' <li><strong>Multilabel or Imbalanced Classification:</strong> Use ' | 
| 498 "<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>" | 498 '<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>' | 
| 499 " <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> " | 499 ' <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> ' | 
| 500 "or <strong>Macro ROC-AUC</strong>.</li>" | 500 'or <strong>Macro ROC-AUC</strong>.</li>' | 
| 501 " <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> " | 501 ' <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> ' | 
| 502 "to account for class imbalance.</li>" | 502 'to account for class imbalance.</li>' | 
| 503 " <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>" | 503 ' <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>' | 
| 504 " <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> " | 504 ' <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> ' | 
| 505 "for class-wise performance in classification.</li>" | 505 'for class-wise performance in classification.</li>' | 
| 506 " </ul>" | 506 ' </ul>' | 
| 507 " </div>" | 507 ' </div>' | 
| 508 " </div>" | 508 ' </div>' | 
| 509 "</div>" | 509 '</div>' | 
| 510 ) | 510 ) | 
| 511 | 511 | 
| 512 modal_js = ( | 512 modal_js = ( | 
| 513 "<script>" | 513 "<script>" | 
| 514 "document.addEventListener('DOMContentLoaded', function() {" | 514 "document.addEventListener('DOMContentLoaded', function() {" | 
