comparison image_workflow.py @ 20:64872c48a21f draft

planemo upload for repository https://github.com/goeckslab/gleam.git commit d4b122527a2402e43512f9b4bda00c7bff0ec9e9
author goeckslab
date Tue, 06 Jan 2026 15:35:11 +0000
parents db9be962dc13
children d5c582cf74bc
comparison
equal deleted inserted replaced
19:c460abae83eb 20:64872c48a21f
166 df=df, 166 df=df,
167 split_column=SPLIT_COLUMN_NAME, 167 split_column=SPLIT_COLUMN_NAME,
168 split_probabilities=self.args.split_probabilities, 168 split_probabilities=self.args.split_probabilities,
169 random_state=self.args.random_seed, 169 random_state=self.args.random_seed,
170 label_column=LABEL_COLUMN_NAME, 170 label_column=LABEL_COLUMN_NAME,
171 group_column=self.args.sample_id_column,
171 ) 172 )
172 split_config = { 173 split_config = {
173 "type": "fixed", 174 "type": "fixed",
174 "column": SPLIT_COLUMN_NAME, 175 "column": SPLIT_COLUMN_NAME,
175 } 176 }
176 split_info = ( 177 split_info = (
177 f"No split column in CSV. Created stratified random split: " 178 f"No split column in CSV. Created stratified random split: "
178 f"{[int(p * 100) for p in self.args.split_probabilities]}% " 179 f"{[int(p * 100) for p in self.args.split_probabilities]}% "
179 f"for train/val/test with balanced label distribution." 180 f"for train/val/test with balanced label distribution."
180 ) 181 )
182 if self.args.sample_id_column:
183 split_info += (
184 f" Grouped by sample ID column '{self.args.sample_id_column}' "
185 "to prevent data leakage."
186 )
181 187
182 final_csv = self.temp_dir / TEMP_CSV_FILENAME 188 final_csv = self.temp_dir / TEMP_CSV_FILENAME
183 189
184 try: 190 try:
185 df.to_csv(final_csv, index=False) 191 df.to_csv(final_csv, index=False)