Mercurial > repos > goeckslab > multimodal_learner
comparison multimodal_learner.py @ 5:975512caae22 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit e984bd965d46c5f9ee5c3beb7429f3fd4a91ee35
| author | goeckslab |
|---|---|
| date | Fri, 23 Jan 2026 23:06:00 +0000 |
| parents | de753cf07008 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:de753cf07008 | 5:975512caae22 |
|---|---|
| 241 logger.info(f"Train dataset loaded: {len(train_dataset)} rows") | 241 logger.info(f"Train dataset loaded: {len(train_dataset)} rows") |
| 242 if test_dataset is not None: | 242 if test_dataset is not None: |
| 243 logger.info(f"Test dataset loaded: {len(test_dataset)} rows") | 243 logger.info(f"Test dataset loaded: {len(test_dataset)} rows") |
| 244 | 244 |
| 245 # ------------------------------------------------------------------ | 245 # ------------------------------------------------------------------ |
| 246 # Resolve target column by name; if Galaxy passed a numeric index, | 246 # Resolve columns by name; if Galaxy passed a numeric index, |
| 247 # translate it to the corresponding header so downstream checks pass. | 247 # translate it to the corresponding header so downstream checks pass. |
| 248 # Galaxy's data_column widget is 1-based. | 248 # Galaxy's data_column widget is 1-based. |
| 249 # ------------------------------------------------------------------ | 249 # ------------------------------------------------------------------ |
| 250 if args.target_column not in train_dataset.columns and str(args.target_column).isdigit(): | 250 def resolve_column_name(value, columns, label): |
| 251 idx = int(args.target_column) - 1 | 251 if value is None: |
| 252 if 0 <= idx < len(train_dataset.columns): | 252 return None |
| 253 resolved = train_dataset.columns[idx] | 253 if str(value).isdigit(): |
| 254 logger.info(f"Target column '{args.target_column}' not found; using column #{idx + 1} header '{resolved}' instead.") | 254 idx = int(value) - 1 |
| 255 args.target_column = resolved | 255 if 0 <= idx < len(columns): |
| 256 else: | 256 resolved = columns[idx] |
| 257 logger.error(f"Numeric target index '{args.target_column}' is out of range for dataset with {len(train_dataset.columns)} columns.") | 257 if value in columns: |
| 258 logger.warning( | |
| 259 "%s column value '%s' matches a header, but Galaxy data_column " | |
| 260 "inputs are interpreted as 1-based indices; using column #%s header '%s'.", | |
| 261 label, | |
| 262 value, | |
| 263 idx + 1, | |
| 264 resolved, | |
| 265 ) | |
| 266 logger.info( | |
| 267 "%s column '%s' not found; using column #%s header '%s' instead.", | |
| 268 label, | |
| 269 value, | |
| 270 idx + 1, | |
| 271 resolved, | |
| 272 ) | |
| 273 return resolved | |
| 274 logger.error( | |
| 275 "Numeric %s index '%s' is out of range for dataset with %s columns.", | |
| 276 label.lower(), | |
| 277 value, | |
| 278 len(columns), | |
| 279 ) | |
| 258 sys.exit(1) | 280 sys.exit(1) |
| 281 return value | |
| 282 | |
| 283 args.target_column = resolve_column_name(args.target_column, train_dataset.columns, "Target") | |
| 284 args.sample_id_column = resolve_column_name( | |
| 285 args.sample_id_column, train_dataset.columns, "Sample ID" | |
| 286 ) | |
| 259 | 287 |
| 260 # ------------------------------------------------------------------ | 288 # ------------------------------------------------------------------ |
| 261 # Image handling (ZIP extraction + absolute path expansion) | 289 # Image handling (ZIP extraction + absolute path expansion) |
| 262 # ------------------------------------------------------------------ | 290 # ------------------------------------------------------------------ |
| 263 extracted_imgs_path = prepare_image_search_dirs(args) | 291 extracted_imgs_path = prepare_image_search_dirs(args) |
