Mercurial > repos > goeckslab > ludwig_config_generator
diff ludwig_autogenconfig.py @ 0:183adfc24076 draft default tip
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
author | goeckslab |
---|---|
date | Tue, 07 Jan 2025 22:46:36 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ludwig_autogenconfig.py Tue Jan 07 22:46:36 2025 +0000 @@ -0,0 +1,55 @@ +import argparse +import logging + +from ludwig import automl +from ludwig.utils import defaults + +from pandas import read_csv + +logging.basicConfig(level=logging.DEBUG) +LOG = logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser( + description='Render a Ludwig config') + parser.add_argument( + '--dataset', + type=str, + help='Path to the dataset file', + required=True) + parser.add_argument( + '--output_feature', + type=int, + help='Name for the output feature', + required=True) + parser.add_argument( + '--output', + type=str, + help='Path for the output file', + required=True) + parser.add_argument( + '--renderconfig', + action='store_true', + help='Render the config', + required=False, + default=False) + args = parser.parse_args() + + # get the output feature name + df = read_csv(args.dataset, nrows=2, sep=None, engine='python') + names = df.columns.tolist() + target = names[args.output_feature-1] + + args_init = ["--dataset", args.dataset, + "--target", target, + "--output", args.output] + automl.cli_init_config(args_init) + + if args.renderconfig: + args_render = ["--config", args.output, "--output", args.output] + defaults.cli_render_config(args_render) + + +if __name__ == "__main__": + main()