comparison multimodal_learner.py @ 5:975512caae22 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit e984bd965d46c5f9ee5c3beb7429f3fd4a91ee35
author goeckslab
date Fri, 23 Jan 2026 23:06:00 +0000
parents de753cf07008
children
comparison
equal deleted inserted replaced
4:de753cf07008 5:975512caae22
241 logger.info(f"Train dataset loaded: {len(train_dataset)} rows") 241 logger.info(f"Train dataset loaded: {len(train_dataset)} rows")
242 if test_dataset is not None: 242 if test_dataset is not None:
243 logger.info(f"Test dataset loaded: {len(test_dataset)} rows") 243 logger.info(f"Test dataset loaded: {len(test_dataset)} rows")
244 244
245 # ------------------------------------------------------------------ 245 # ------------------------------------------------------------------
246 # Resolve target column by name; if Galaxy passed a numeric index, 246 # Resolve columns by name; if Galaxy passed a numeric index,
247 # translate it to the corresponding header so downstream checks pass. 247 # translate it to the corresponding header so downstream checks pass.
248 # Galaxy's data_column widget is 1-based. 248 # Galaxy's data_column widget is 1-based.
249 # ------------------------------------------------------------------ 249 # ------------------------------------------------------------------
250 if args.target_column not in train_dataset.columns and str(args.target_column).isdigit(): 250 def resolve_column_name(value, columns, label):
251 idx = int(args.target_column) - 1 251 if value is None:
252 if 0 <= idx < len(train_dataset.columns): 252 return None
253 resolved = train_dataset.columns[idx] 253 if str(value).isdigit():
254 logger.info(f"Target column '{args.target_column}' not found; using column #{idx + 1} header '{resolved}' instead.") 254 idx = int(value) - 1
255 args.target_column = resolved 255 if 0 <= idx < len(columns):
256 else: 256 resolved = columns[idx]
257 logger.error(f"Numeric target index '{args.target_column}' is out of range for dataset with {len(train_dataset.columns)} columns.") 257 if value in columns:
258 logger.warning(
259 "%s column value '%s' matches a header, but Galaxy data_column "
260 "inputs are interpreted as 1-based indices; using column #%s header '%s'.",
261 label,
262 value,
263 idx + 1,
264 resolved,
265 )
266 logger.info(
267 "%s column '%s' not found; using column #%s header '%s' instead.",
268 label,
269 value,
270 idx + 1,
271 resolved,
272 )
273 return resolved
274 logger.error(
275 "Numeric %s index '%s' is out of range for dataset with %s columns.",
276 label.lower(),
277 value,
278 len(columns),
279 )
258 sys.exit(1) 280 sys.exit(1)
281 return value
282
283 args.target_column = resolve_column_name(args.target_column, train_dataset.columns, "Target")
284 args.sample_id_column = resolve_column_name(
285 args.sample_id_column, train_dataset.columns, "Sample ID"
286 )
259 287
260 # ------------------------------------------------------------------ 288 # ------------------------------------------------------------------
261 # Image handling (ZIP extraction + absolute path expansion) 289 # Image handling (ZIP extraction + absolute path expansion)
262 # ------------------------------------------------------------------ 290 # ------------------------------------------------------------------
263 extracted_imgs_path = prepare_image_search_dirs(args) 291 extracted_imgs_path = prepare_image_search_dirs(args)