comparison pycaret_train.py @ 15:01e7c5481f13 draft

planemo upload for repository https://github.com/goeckslab/gleam commit f632803cda732005bdcf3ac3e8fe7a807a82c1d9
author goeckslab
date Mon, 19 Jan 2026 05:54:52 +0000
parents 49f73a3c12f3
children
comparison
equal deleted inserted replaced
14:edd515746388 15:01e7c5481f13
131 parser.add_argument( 131 parser.add_argument(
132 "--best_model_metric", 132 "--best_model_metric",
133 type=str, 133 type=str,
134 default=None, 134 default=None,
135 help="Metric used to select the best model (e.g. AUC, Accuracy, R2, RMSE).", 135 help="Metric used to select the best model (e.g. AUC, Accuracy, R2, RMSE).",
136 )
137 parser.add_argument(
138 "--sample-id-column",
139 type=str,
140 default=None,
141 help=(
142 "Optional column name used to group samples during splitting "
143 "to prevent data leakage (e.g., patient_id or slide_id)."
144 ),
136 ) 145 )
137 146
138 args = parser.parse_args() 147 args = parser.parse_args()
139 148
140 # Derive n_jobs from CLI or GALAXY_SLOTS env var 149 # Derive n_jobs from CLI or GALAXY_SLOTS env var
168 "fix_imbalance": args.fix_imbalance, 177 "fix_imbalance": args.fix_imbalance,
169 "tune_model": args.tune_model, 178 "tune_model": args.tune_model,
170 "n_jobs": n_jobs, 179 "n_jobs": n_jobs,
171 "probability_threshold": args.probability_threshold, 180 "probability_threshold": args.probability_threshold,
172 "best_model_metric": args.best_model_metric, 181 "best_model_metric": args.best_model_metric,
182 "sample_id_column": args.sample_id_column,
173 } 183 }
174 LOG.info(f"Model kwargs: {model_kwargs}") 184 LOG.info(f"Model kwargs: {model_kwargs}")
175 185
176 # If the XML passed a comma-separated string in a single list element, split it out 186 # If the XML passed a comma-separated string in a single list element, split it out
177 if args.models: 187 if args.models: