image_learner: image_learner

comparison image_learner_cli.py @ 9:9e912fce264c draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191

author	goeckslab
date	Wed, 27 Aug 2025 21:02:48 +0000
parents	85e6f4b2ad18
children

comparison

equal deleted inserted replaced

-:85e6f4b2ad18
+:9e912fce264c
 METRIC_DISPLAY_NAMES,
 MODEL_ENCODER_TEMPLATES,
 SPLIT_COLUMN_NAME,
 TEMP_CONFIG_FILENAME,
 TEMP_CSV_FILENAME,
-TEMP_DIR_PREFIX
+TEMP_DIR_PREFIX,
 )
 from ludwig.globals import (
 DESCRIPTION_FILE_NAME,
 PREDICTIONS_PARQUET_FILE_NAME,
 TEST_STATISTICS_FILE_NAME,
 from utils import (
 build_tabbed_html,
 encode_image_to_base64,
 get_html_closing,
 get_html_template,
-get_metrics_help_modal
+get_metrics_help_modal,
 )
 # --- Logging Setup ---
 logging.basicConfig(
 level=logging.INFO,
-format='%(asctime)s %(levelname)s %(name)s: %(message)s',
+format="%(asctime)s %(levelname)s %(name)s: %(message)s",
 )
 logger = logging.getLogger("ImageLearner")
 def format_config_table_html(
 "learning_rate",
 "random_seed",
 "early_stop",
 "threshold",
 ]
 rows = []
 for key in display_keys:
 val = config.get(key, None)
 if key == "threshold":
 if output_type != "binary":
 continue
 )
 else:
 val_str = val
 else:
 val_str = val if val is not None else "N/A"
-if val_str == "N/A" and key not in ["task_type"]:  # Skip if N/A for non-essential
+if val_str == "N/A" and key not in [
+"task_type"
+]:  # Skip if N/A for non-essential
 continue
 rows.append(
 f"<tr>"
 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left;'>"
 f"{key.replace('_', ' ').title()}</td>"
 <thead><tr>
 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th>
 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th>
 </tr></thead>
 <tbody>
-{''.join(rows)}
+{"".join(rows)}
 </tbody>
 </table>
 </div><br>
 <p style="text-align: center; font-size: 0.9em;">
 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>.
 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"),
 "loss": get_last_value(label_stats, "loss"),
 "roc_auc": get_last_value(label_stats, "roc_auc"),
 "hits_at_k": get_last_value(label_stats, "hits_at_k"),
 }
 # Test metrics: dynamic extraction according to exclusions
 test_label_stats = test_stats.get("label", {})
 if not test_label_stats:
 logging.warning("No label statistics found for test split")
 else:
 combined_stats = test_stats.get("combined", {})
 overall_stats = test_label_stats.get("overall_stats", {})
 # Define exclusions
 if output_type == "binary":
 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"}
 else:
 exclude = {"per_class_stats", "confusion_matrix"}
 # 1. Get all scalar test_label_stats not excluded
 test_metrics = {}
 for k, v in test_label_stats.items():
 if k in exclude:
 continue
 if k == "overall_stats":
 continue
 if isinstance(v, (int, float, str, bool)):
 test_metrics[k] = v
 # 2. Add overall_stats (flattened)
 for k, v in overall_stats.items():
 test_metrics[k] = v
 # 3. Optionally include combined/loss if present and not already
 if "loss" in combined_stats and "loss" not in test_metrics:
 test_metrics["loss"] = combined_stats["loss"]
 metrics["test"] = test_metrics
 return metrics
 t = all_metrics["training"].get(metric_key)
 v = all_metrics["validation"].get(metric_key)
 te = all_metrics["test"].get(metric_key)
 if all(x is not None for x in [t, v, te]):
 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"])
 if not rows:
 return "<table><tr><td>No metric values found.</td></tr></table>"
 html = (
 "<h2 style='text-align: center;'>Model Performance Summary</h2>"
 "<div style='display: flex; justify-content: center;'>"
 "<table class='performance-summary' style='border-collapse: collapse;'>"
 "<thead><tr>"
 "</tr></thead><tbody>"
 )
 for row in rows:
 html += generate_table_row(
 row,
-"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;"
+"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
 )
 html += "</tbody></table></div><br>"
 return html
 )
 t = all_metrics["training"].get(metric_key)
 v = all_metrics["validation"].get(metric_key)
 if t is not None and v is not None:
 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"])
 if not rows:
 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>"
 html = (
 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>"
 "<div style='display: flex; justify-content: center;'>"
 "<table class='performance-summary' style='border-collapse: collapse;'>"
 "<thead><tr>"
 "</tr></thead><tbody>"
 )
 for row in rows:
 html += generate_table_row(
 row,
-"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;"
+"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
 )
 html += "</tbody></table></div><br>"
 return html
 for key in sorted(test_metrics.keys()):
 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title())
 value = test_metrics[key]
 if value is not None:
 rows.append([display_name, f"{value:.4f}"])
 if not rows:
 return "<table><tr><td>No test metric values found.</td></tr></table>"
 html = (
 "<h2 style='text-align: center;'>Test Performance Summary</h2>"
 "<div style='display: flex; justify-content: center;'>"
 "<table class='performance-summary' style='border-collapse: collapse;'>"
 "<thead><tr>"
 "</tr></thead><tbody>"
 )
 for row in rows:
 html += generate_table_row(
 row,
-"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;"
+"padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
 )
 html += "</tbody></table></div><br>"
 return html
 if label_counts.size > 1:
 # Force stratify even with fewer samples - adjust validation_size if needed
 min_samples_per_class = label_counts.min()
 if min_samples_per_class * validation_size < 1:
 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size
-adjusted_validation_size = min(validation_size, 1.0 / min_samples_per_class)
+adjusted_validation_size = min(
+validation_size, 1.0 / min_samples_per_class
+)
 if adjusted_validation_size != validation_size:
 validation_size = adjusted_validation_size
-logger.info(f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation")
+logger.info(
+f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation"
+)
 stratify_arr = out.loc[idx_train, label_column]
 logger.info("Using stratified split for validation set")
 else:
 logger.warning("Only one label class found; cannot stratify")
 if validation_size <= 0:
 """Create a stratified random split when no split column exists."""
 out = df.copy()
 # initialize split column
 out[split_column] = 0
 if not label_column or label_column not in out.columns:
-logger.warning("No label column found; using random split without stratification")
+logger.warning(
+"No label column found; using random split without stratification"
+)
 # fall back to simple random assignment
 indices = out.index.tolist()
 np.random.seed(random_state)
 np.random.shuffle(indices)
 n_total = len(indices)
 test_size=split_probabilities[2],
 random_state=random_state,
 stratify=out[label_column],
 )
 # second split: separate training and validation from remaining data
-val_size_adjusted = split_probabilities[1] / (split_probabilities[0] + split_probabilities[1])
+val_size_adjusted = split_probabilities[1] / (
+split_probabilities[0] + split_probabilities[1]
+)
 train_idx, val_idx = train_test_split(
 train_val_idx,
 test_size=val_size_adjusted,
 random_state=random_state,
 stratify=out.loc[train_val_idx, label_column],
 # assign split values
 out.loc[train_idx, split_column] = 0
 out.loc[val_idx, split_column] = 1
 out.loc[test_idx, split_column] = 2
 logger.info("Successfully applied stratified random split")
-logger.info(f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}")
+logger.info(
+f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}"
+)
 return out.astype({split_column: int})
 class Backend(Protocol):
 """Interface for a machine learning backend."""
 def prepare_config(
 self,
 config_params: Dict[str, Any],
 split_config: Dict[str, Any],
 ) -> str:
 ...
 class LudwigDirectBackend:
 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
 def prepare_config(
 self,
 config_params: Dict[str, Any],
 split_config: Dict[str, Any],
 ) -> str:
 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
 model_name = config_params.get("model_name", "resnet18")
 use_pretrained = config_params.get("use_pretrained", False)
 fine_tune = config_params.get("fine_tune", False)
 if use_pretrained:
 trainable = bool(fine_tune)
 "use_pretrained": use_pretrained,
 "trainable": trainable,
 }
 else:
 encoder_config = {"type": raw_encoder}
 batch_size_cfg = batch_size or "auto"
 label_column_path = config_params.get("label_column_data_path")
 label_series = None
 if label_column_path is not None and Path(label_column_path).exists():
 try:
 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
 except Exception as e:
 logger.warning(f"Could not read label column for task detection: {e}")
 if (
 label_series is not None
 and ptypes.is_numeric_dtype(label_series.dtype)
 and label_series.nunique() > 10
 ):
 task_type = "regression"
 else:
 task_type = "classification"
 config_params["task_type"] = task_type
 image_feat: Dict[str, Any] = {
 "name": IMAGE_PATH_COLUMN_NAME,
 "type": "image",
 "encoder": encoder_config,
 }
 if config_params.get("augmentation") is not None:
 image_feat["augmentation"] = config_params["augmentation"]
 if task_type == "regression":
 output_feat = {
 "name": LABEL_COLUMN_NAME,
 "type": "number",
 "decoder": {"type": "regressor"},
 "r2",
 ]
 },
 }
 val_metric = config_params.get("validation_metric", "mean_squared_error")
 else:
 num_unique_labels = (
 label_series.nunique() if label_series is not None else 2
 )
 output_type = "binary" if num_unique_labels == 2 else "category"
 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type}
 if output_type == "binary" and config_params.get("threshold") is not None:
 output_feat["threshold"] = float(config_params["threshold"])
 val_metric = None
 conf: Dict[str, Any] = {
 "model_type": "ecd",
 "input_features": [image_feat],
 "output_features": [output_feat],
 "combiner": {"type": "concat"},
 "split": split_config,
 "num_processes": num_processes,
 "in_memory": False,
 },
 }
 logger.debug("LudwigDirectBackend: Config dict built.")
 try:
 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
 logger.info("LudwigDirectBackend: YAML config generated.")
 return yaml_str
 output_dir: Path,
 random_seed: int = 42,
 ) -> None:
 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
 logger.info("LudwigDirectBackend: Starting experiment execution.")
 try:
 from ludwig.experiment import experiment_cli
 except ImportError as e:
 logger.error(
 "LudwigDirectBackend: Could not import experiment_cli.",
 exc_info=True,
 )
 raise RuntimeError("Ludwig import failed.") from e
 output_dir.mkdir(parents=True, exist_ok=True)
 try:
 experiment_cli(
 dataset=str(dataset_path),
 config=str(config_path),
 output_directory=str(output_dir),
 output_dir = Path(output_dir)
 exp_dirs = sorted(
 output_dir.glob("experiment_run*"),
 key=lambda p: p.stat().st_mtime,
 )
 if not exp_dirs:
 logger.warning(f"No experiment run directories found in {output_dir}")
 return None
 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
 if not progress_file.exists():
 logger.warning(f"No training_progress.json found in {progress_file}")
 return None
 try:
 with progress_file.open("r", encoding="utf-8") as f:
 data = json.load(f)
 return {
 "learning_rate": data.get("learning_rate"),
 logger.error(f"Error converting Parquet to CSV: {e}")
 def generate_plots(self, output_dir: Path) -> None:
 """Generate all registered Ludwig visualizations for the latest experiment run."""
 logger.info("Generating all Ludwig visualizations…")
 test_plots = {
 "compare_performance",
 "compare_classifiers_performance_from_prob",
 "compare_classifiers_performance_from_pred",
 "compare_classifiers_performance_changing_k",
 }
 train_plots = {
 "learning_curves",
 "compare_classifiers_performance_subset",
 }
 output_dir = Path(output_dir)
 exp_dirs = sorted(
 output_dir.glob("experiment_run*"),
 key=lambda p: p.stat().st_mtime,
 )
 if not exp_dirs:
 logger.warning(f"No experiment run dirs found in {output_dir}")
 return
 exp_dir = exp_dirs[-1]
 viz_dir = exp_dir / "visualizations"
 viz_dir.mkdir(exist_ok=True)
 train_viz = viz_dir / "train"
 test_viz = viz_dir / "test"
 train_viz.mkdir(parents=True, exist_ok=True)
 training_stats = _check(exp_dir / "training_statistics.json")
 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME)
 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
 dataset_path = None
 split_file = None
 desc = exp_dir / DESCRIPTION_FILE_NAME
 if desc.exists():
 with open(desc, "r") as f:
 cfg = json.load(f)
 dataset_path = _check(Path(cfg.get("dataset", "")))
 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
 output_feature = ""
 if desc.exists():
 try:
 output_feature = cfg["config"]["output_features"][0]["name"]
 except Exception:
 pass
 if not output_feature and test_stats:
 with open(test_stats, "r") as f:
 stats = json.load(f)
 output_feature = next(iter(stats.keys()), "")
 viz_registry = get_visualizations_registry()
 for viz_name, viz_func in viz_registry.items():
 if viz_name in train_plots:
 viz_dir_plot = train_viz
 elif viz_name in test_plots:
 viz_dir_plot = test_viz
 else:
 continue
 try:
 viz_func(
 training_statistics=[training_stats] if training_stats else [],
 test_statistics=[test_stats] if test_stats else [],
 probabilities=[probs_path] if probs_path else [],
 file_format="png",
 )
 logger.info(f"✔ Generated {viz_name}")
 except Exception as e:
 logger.warning(f"✘ Skipped {viz_name}: {e}")
 logger.info(f"All visualizations written to {viz_dir}")
 def generate_html_report(
 self,
 title: str,
 cwd = Path.cwd()
 report_name = title.lower().replace(" ", "_") + "_report.html"
 report_path = cwd / report_name
 output_dir = Path(output_dir)
 output_type = None
 exp_dirs = sorted(
 output_dir.glob("experiment_run*"),
 key=lambda p: p.stat().st_mtime,
 )
 if not exp_dirs:
 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
 exp_dir = exp_dirs[-1]
 base_viz_dir = exp_dir / "visualizations"
 train_viz_dir = base_viz_dir / "train"
 test_viz_dir = base_viz_dir / "test"
 html = get_html_template()
 html += f"<h1>{title}</h1>"
 metrics_html = ""
 train_val_metrics_html = ""
 test_metrics_html = ""
 try:
 train_stats_path = exp_dir / "training_statistics.json"
 )
 except Exception as e:
 logger.warning(
 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
 )
 config_html = ""
 training_progress = self.get_training_process(output_dir)
 try:
 config_html = format_config_table_html(
-config, split_info, training_progress
+config, split_info, training_progress, output_type
 )
 except Exception as e:
 logger.warning(f"Could not load config for HTML report: {e}")
 def render_img_section(
 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>"
 # collect every PNG
 imgs = list(dir_path.glob("*.png"))
 # --- EXCLUDE Ludwig's base confusion matrix and any top-N confusion_matrix files ---
 imgs = [
-img for img in imgs
+img
+for img in imgs
 if not (
 img.name == "confusion_matrix.png"
 or img.name.startswith("confusion_matrix__label_top")
 or img.name == "roc_curves.png"
 )
 ]
 # filter and order
 valid_imgs = [img for img in imgs if img.name not in unwanted]
 img_map = {img.name: img for img in valid_imgs}
 ordered = [img_map[n] for n in display_order if n in img_map]
-others = sorted(img for img in valid_imgs if img.name not in display_order)
+others = sorted(
+img for img in valid_imgs if img.name not in display_order
+)
 imgs = ordered + others
 else:
 # regression: just sort whatever's left
 imgs = sorted(imgs)
 # render each remaining PNG
 if pred_col is None:
 raise ValueError("No prediction column found in Parquet output")
 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
 # 2) load ground truth for the test split from prepared CSV
 df_all = pd.read_csv(config["label_column_data_path"])
-df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][LABEL_COLUMN_NAME].reset_index(drop=True)
+df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
+LABEL_COLUMN_NAME
+].reset_index(drop=True)
 # 3) concatenate side-by-side
 df_table = pd.concat([df_gt, df_pred], axis=1)
 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
 # 4) render as HTML
 preds_html = df_table.to_html(index=False, classes="predictions-table")
 str(training_stats_path),
 )
 for plot in interactive_plots:
 # 2) inject the static "roc_curves_from_prediction_statistics.png"
 if plot["title"] == "ROC-AUC":
-static_img = test_viz_dir / "roc_curves_from_prediction_statistics.png"
+static_img = (
+test_viz_dir / "roc_curves_from_prediction_statistics.png"
+)
 if static_img.exists():
 b64 = encode_image_to_base64(str(static_img))
 tab3_content += (
 "<h2 style='text-align: center;'>"
 "Roc Curves From Prediction Statistics"
 tab3_content += (
 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
 + plot["html"]
 )
 tab3_content += render_img_section(
-"Test Visualizations",
+"Test Visualizations", test_viz_dir, output_type
-test_viz_dir,
-output_type
 )
 # assemble the tabs and help modal
 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
 modal_html = get_metrics_help_modal()
 html += tabbed_html + modal_html + get_html_closing()
 try:
 with open(report_path, "w") as f:
 f.write(html)
 logger.info(f"HTML report generated at: {report_path}")
 except Exception as e:
 logger.error(f"Failed to write HTML report: {e}")
 raise
 return report_path
 class WorkflowOrchestrator:
 """Manages the image-classification workflow."""
 def __init__(self, args: argparse.Namespace, backend: Backend):
 self.args = args
 self.backend = backend
 self.temp_dir: Optional[Path] = None
 self.image_extract_dir: Optional[Path] = None
 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]:
 """Load CSV, update image paths, handle splits, and write prepared CSV."""
 if not self.temp_dir or not self.image_extract_dir:
 raise RuntimeError("Temp dirs not initialized before data prep.")
 try:
 df = pd.read_csv(self.args.csv_file)
 logger.info(f"Loaded CSV: {self.args.csv_file}")
 except Exception:
 logger.error("Error loading CSV file", exc_info=True)
 raise
 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME}
 missing = required - set(df.columns)
 if missing:
 raise ValueError(f"Missing CSV columns: {', '.join(missing)}")
 try:
 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply(
 lambda p: str((self.image_extract_dir / p).resolve())
 )
 except Exception:
 split_info = (
 f"No split column in CSV. Created stratified random split: "
 f"{[int(p * 100) for p in self.args.split_probabilities]}% "
 f"for train/val/test with balanced label distribution."
 )
 final_csv = self.temp_dir / TEMP_CSV_FILENAME
 try:
 df.to_csv(final_csv, index=False)
 logger.info(f"Saved prepared data to {final_csv}")
 except Exception:
 logger.error("Error saving prepared CSV", exc_info=True)
 raise
 return final_csv, split_config, split_info
 def _process_fixed_split(
 self, df: pd.DataFrame
 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]:
 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype(
 pd.Int64Dtype()
 )
 if df[SPLIT_COLUMN_NAME].isna().any():
 logger.warning("Split column contains non-numeric/missing values.")
 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique())
 logger.info(f"Unique split values: {unique}")
 if unique == {0, 2}:
 df = split_data_0_2(
 df,
 elif unique.issubset({0, 1, 2}):
 split_info = "Used user-defined split column from CSV."
 logger.info("Using fixed split as-is.")
 else:
 raise ValueError(f"Unexpected split values: {unique}")
 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info
 except Exception:
 logger.error("Error processing fixed split", exc_info=True)
 raise
 def _cleanup_temp_dirs(self) -> None:
 def run(self) -> None:
 """Execute the full workflow end-to-end."""
 logger.info("Starting workflow...")
 self.args.output_dir.mkdir(parents=True, exist_ok=True)
 try:
 self._create_temp_dirs()
 self._extract_images()
 csv_path, split_cfg, split_info = self._prepare_data()
 use_pretrained = self.args.use_pretrained or self.args.fine_tune
 backend_args = {
 "model_name": self.args.model_name,
 "fine_tune": self.args.fine_tune,
 "use_pretrained": use_pretrained,
 "epochs": self.args.epochs,
 "label_column_data_path": csv_path,
 "augmentation": self.args.augmentation,
 "threshold": self.args.threshold,
 }
 yaml_str = self.backend.prepare_config(backend_args, split_cfg)
 config_file = self.temp_dir / TEMP_CONFIG_FILENAME
 config_file.write_text(yaml_str)
 logger.info(f"Wrote backend config: {config_file}")
 self.backend.run_experiment(
 csv_path,
 config_file,
 self.args.output_dir,
 self.args.random_seed,
 nargs=3,
 metavar=("train", "val", "test"),
 action=SplitProbAction,
 default=[0.7, 0.1, 0.2],
 help=(
-"Random split proportions (e.g., 0.7 0.1 0.2)."
+"Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column."
-"Only used if no split column."
 ),
 )
 parser.add_argument(
 "--random-seed",
 type=int,
 type=float,
 default=None,
 help=(
 "Decision threshold for binary classification (0.0–1.0)."
 "Overrides default 0.5."
-)
+),
 )
 args = parser.parse_args()
 if not 0.0 <= args.validation_size <= 1.0:
 parser.error("validation-size must be between 0.0 and 1.0")
 if not args.csv_file.is_file():
 parser.error(f"CSV not found: {args.csv_file}")
 if not args.image_zip.is_file():
 try:
 augmentation_setup = aug_parse(args.augmentation)
 setattr(args, "augmentation", augmentation_setup)
 except ValueError as e:
 parser.error(str(e))
 backend_instance = LudwigDirectBackend()
 orchestrator = WorkflowOrchestrator(args, backend_instance)
 exit_code = 0
 try:
 orchestrator.run()
 logger.info("Main script finished successfully.")
 except Exception as e:
 if __name__ == "__main__":
 try:
 import ludwig
 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}")
 except ImportError:
 logger.error(
 "Ludwig library not found. Please ensure Ludwig is installed "
 "('pip install ludwig[image]')"
 )
 sys.exit(1)
 main()

Mercurial > repos > goeckslab > image_learner

comparison image_learner_cli.py @ 9:9e912fce264c draft default tip