Mercurial > repos > goeckslab > image_learner
diff image_workflow.py @ 20:64872c48a21f draft
planemo upload for repository https://github.com/goeckslab/gleam.git commit d4b122527a2402e43512f9b4bda00c7bff0ec9e9
| author | goeckslab |
|---|---|
| date | Tue, 06 Jan 2026 15:35:11 +0000 |
| parents | db9be962dc13 |
| children | d5c582cf74bc |
line wrap: on
line diff
--- a/image_workflow.py Thu Dec 18 16:59:58 2025 +0000 +++ b/image_workflow.py Tue Jan 06 15:35:11 2026 +0000 @@ -168,6 +168,7 @@ split_probabilities=self.args.split_probabilities, random_state=self.args.random_seed, label_column=LABEL_COLUMN_NAME, + group_column=self.args.sample_id_column, ) split_config = { "type": "fixed", @@ -178,6 +179,11 @@ f"{[int(p * 100) for p in self.args.split_probabilities]}% " f"for train/val/test with balanced label distribution." ) + if self.args.sample_id_column: + split_info += ( + f" Grouped by sample ID column '{self.args.sample_id_column}' " + "to prevent data leakage." + ) final_csv = self.temp_dir / TEMP_CSV_FILENAME
