Mercurial > repos > goeckslab > ludwig_experiment
changeset 5:78b1e3921576 draft
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
| author | goeckslab | 
|---|---|
| date | Sat, 21 Jun 2025 15:05:21 +0000 | 
| parents | 5a39f429fad1 | 
| children | ec99e53e91d6 | 
| files | Docker/galaxy_ludwig/Dockerfile Docker/galaxy_ludwig_ray_gpu/Dockerfile LICENSE README.md ludwig_autogenconfig.py ludwig_evaluate.py ludwig_experiment.py ludwig_hyperopt.py ludwig_macros.xml ludwig_predict.py ludwig_render_config.py ludwig_train.py ludwig_visualize.py | 
| diffstat | 13 files changed, 110 insertions(+), 19 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Docker/galaxy_ludwig/Dockerfile Sat Jun 21 15:05:21 2025 +0000 @@ -0,0 +1,16 @@ +FROM python:3.10-slim + +ARG VERSION=0.10.3 + +RUN apt-get -y update && apt-get install -y --no-install-recommends build-essential cmake git unzip + +RUN pip install -U pip && \ + pip install 'git+https://github.com/goeckslab/model-unpickler.git' && \ + pip install 'git+https://github.com/goeckslab/smart-report.git@17df590f3ceb065add099f37b4874c85bd275014' && \ + pip install 'ludwig[full]'==$VERSION && \ + pip uninstall -y matplotlib && \ + pip install matplotlib==3.8.3 && \ + pip cache purge + +RUN apt-get purge -y build-essential cmake && apt-get -y autoremove && apt-get clean +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Docker/galaxy_ludwig_ray_gpu/Dockerfile Sat Jun 21 15:05:21 2025 +0000 @@ -0,0 +1,19 @@ +FROM ludwigai/ludwig-ray-gpu + +USER root + +# there is an error with the kubernetes-xenial repo +RUN sed -i '/^deb .*kubernetes-xenial/ s/^/#/' /etc/apt/sources.list.d/kubernetes.list || true + +RUN apt-get -y update && apt-get install -y unzip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install -U pip + +RUN pip install --no-cache-dir 'git+https://github.com/goeckslab/model-unpickler.git' && \ + pip install --no-cache-dir 'git+https://github.com/goeckslab/smart-report.git@17df590f3ceb065add099f37b4874c85bd275014' + +RUN useradd -m -s /bin/bash nonrootuser + +USER nonrootuser \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Sat Jun 21 15:05:21 2025 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 goeckslab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Sat Jun 21 15:05:21 2025 +0000 @@ -0,0 +1,48 @@ +# Galaxy-Ludwig +A library of Galaxy deep learning tools based on Ludwig. + +# Install Galaxy-Ludwig into Galaxy +We assume that you have Galaxy running and docker installed in your server/laptop. +* Create a new folder named ludwig(or whatever) under Galaxy’s tools folder. +* Select and download the branch you want to install and use. Copy all XML, py files under the tools folder in this repo to the ludwig folder(the folder you created in the last step). +* Update `tool_conf.xml` to include Galaxy-Ludwig. See [documentation](https://docs.galaxyproject.org/en/master/admin/tool_panel.html) for more details. This is an example: +``` +<section id="ludwig" name="Ludwig Applications"> + <tool file="ludwig/ludwig_evaluate.xml" /> + <tool file="ludwig/ludwig_experiment.xml" /> + <tool file="ludwig/ludwig_hyperopt.xml" /> + <tool file="ludwig/ludwig_predict.xml" /> + <tool file="ludwig/ludwig_render_config.xml" /> + <tool file="ludwig/ludwig_train.xml" /> + <tool file="ludwig/ludwig_visualize.xml" /> +</section> +``` + + +* This is an example of a `job_conf.yml` file that you can create to enable Docker for a local Galaxy instance where you want Ludwig-related jobs to run: + +``` +runners: + local: + load: galaxy.jobs.runners.local:LocalJobRunner + workers: 4 +execution: + default: local + environments: + local: + runner: local + docker_enabled: true +``` +If you are using an older version of Galaxy, then `job_conf.xml` would be something you want to configure instead of `job_conf.yml`. Then you would want to configure destination instead of execution and environment. +See [documentation](https://docs.galaxyproject.org/en/master/admin/jobs.html#running-jobs-in-containers) for job_conf configuration. +* If you haven’t set `sanitize_all_html: false` in `galaxy.yml`, please set it to False to enable our HTML report functionality. + +# Get Galaxy-Ludwig docker image + +This step is optional. +If you want to speed up your runs, execute the following command: +``` +docker pull quay.io/goeckslab/galaxy-ludwig-gpu:0.10.1 +``` + +* Should be good to go.
--- a/ludwig_autogenconfig.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_autogenconfig.py Sat Jun 21 15:05:21 2025 +0000 @@ -3,7 +3,6 @@ from ludwig import automl from ludwig.utils import defaults - from pandas import read_csv logging.basicConfig(level=logging.DEBUG) @@ -39,7 +38,7 @@ # get the output feature name df = read_csv(args.dataset, nrows=2, sep=None, engine='python') names = df.columns.tolist() - target = names[args.output_feature-1] + target = names[args.output_feature - 1] args_init = ["--dataset", args.dataset, "--target", target,
--- a/ludwig_evaluate.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_evaluate.py Sat Jun 21 15:05:21 2025 +0000 @@ -3,10 +3,8 @@ import sys from ludwig.evaluate import cli - from ludwig_experiment import convert_parquet_to_csv, \ generate_html_report, make_visualizations - from model_unpickler import SafeUnpickler
--- a/ludwig_experiment.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_experiment.py Sat Jun 21 15:05:21 2025 +0000 @@ -4,6 +4,7 @@ import pickle import sys +import pandas as pd from ludwig.experiment import cli from ludwig.globals import ( DESCRIPTION_FILE_NAME, @@ -13,11 +14,7 @@ ) from ludwig.utils.data_utils import get_split_path from ludwig.visualize import get_visualizations_registry - from model_unpickler import SafeUnpickler - -import pandas as pd - from utils import ( encode_image_to_base64, get_html_closing, @@ -35,7 +32,7 @@ output_directory = None for ix, arg in enumerate(sys.argv): if arg == "--output_directory": - output_directory = sys.argv[ix+1] + output_directory = sys.argv[ix + 1] break viz_output_directory = os.path.join(output_directory, "visualizations")
--- a/ludwig_hyperopt.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_hyperopt.py Sat Jun 21 15:05:21 2025 +0000 @@ -8,9 +8,7 @@ ) from ludwig.hyperopt_cli import cli from ludwig.visualize import get_visualizations_registry - from model_unpickler import SafeUnpickler - from utils import ( encode_image_to_base64, get_html_closing, @@ -84,7 +82,7 @@ output_directory = None for ix, arg in enumerate(sys.argv): if arg == "--output_directory": - output_directory = sys.argv[ix+1] + output_directory = sys.argv[ix + 1] break hyperopt_stats_path = os.path.join(
--- a/ludwig_macros.xml Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_macros.xml Sat Jun 21 15:05:21 2025 +0000 @@ -1,7 +1,7 @@ <macros> <token name="@LUDWIG_VERSION@">0.10.1</token> - <token name="@SUFFIX@">0</token> + <token name="@SUFFIX@">1</token> <token name="@VERSION@">@LUDWIG_VERSION@+@SUFFIX@</token>
--- a/ludwig_predict.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_predict.py Sat Jun 21 15:05:21 2025 +0000 @@ -3,9 +3,7 @@ import sys from ludwig.predict import cli - from ludwig_experiment import convert_parquet_to_csv - from model_unpickler import SafeUnpickler
--- a/ludwig_render_config.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_render_config.py Sat Jun 21 15:05:21 2025 +0000 @@ -2,6 +2,7 @@ import logging import sys +import yaml from ludwig.constants import ( COMBINER, HYPEROPT, @@ -13,7 +14,6 @@ ) from ludwig.schema.model_types.utils import merge_with_defaults -import yaml logging.basicConfig(level=logging.DEBUG) LOG = logging.getLogger(__name__)
--- a/ludwig_train.py Tue Mar 18 22:09:30 2025 +0000 +++ b/ludwig_train.py Sat Jun 21 15:05:21 2025 +0000 @@ -3,13 +3,11 @@ import sys from ludwig.train import cli - from ludwig_experiment import ( convert_parquet_to_csv, generate_html_report, make_visualizations ) - from model_unpickler import SafeUnpickler
