changeset 4:36c5fcc49286 draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit 4dc221b2fa9717552787f0985ad3fc3df4460158
author goeckslab
date Sat, 21 Jun 2025 15:05:41 +0000
parents 11d3377511bb
children
files Docker/galaxy_ludwig/Dockerfile Docker/galaxy_ludwig_ray_gpu/Dockerfile LICENSE README.md ludwig_autogenconfig.py ludwig_evaluate.py ludwig_experiment.py ludwig_hyperopt.py ludwig_macros.xml ludwig_predict.py ludwig_render_config.py ludwig_train.py ludwig_visualize.py
diffstat 13 files changed, 110 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Docker/galaxy_ludwig/Dockerfile	Sat Jun 21 15:05:41 2025 +0000
@@ -0,0 +1,16 @@
+FROM python:3.10-slim
+
+ARG VERSION=0.10.3
+
+RUN apt-get -y update && apt-get install -y --no-install-recommends build-essential cmake git unzip
+
+RUN pip install -U pip && \
+    pip install 'git+https://github.com/goeckslab/model-unpickler.git' && \
+    pip install 'git+https://github.com/goeckslab/smart-report.git@17df590f3ceb065add099f37b4874c85bd275014' && \
+    pip install 'ludwig[full]'==$VERSION && \
+    pip uninstall -y matplotlib && \
+    pip install matplotlib==3.8.3 && \
+    pip cache purge
+
+RUN apt-get purge -y build-essential cmake && apt-get -y autoremove && apt-get clean
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Docker/galaxy_ludwig_ray_gpu/Dockerfile	Sat Jun 21 15:05:41 2025 +0000
@@ -0,0 +1,19 @@
+FROM ludwigai/ludwig-ray-gpu
+
+USER root
+
+# there is an error with the kubernetes-xenial repo
+RUN sed -i '/^deb .*kubernetes-xenial/ s/^/#/' /etc/apt/sources.list.d/kubernetes.list || true
+
+RUN apt-get -y update && apt-get install -y unzip && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+    
+RUN pip install -U pip
+    
+RUN pip install --no-cache-dir 'git+https://github.com/goeckslab/model-unpickler.git' && \
+    pip install --no-cache-dir 'git+https://github.com/goeckslab/smart-report.git@17df590f3ceb065add099f37b4874c85bd275014'
+
+RUN useradd -m -s /bin/bash nonrootuser
+
+USER nonrootuser
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Sat Jun 21 15:05:41 2025 +0000
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 goeckslab
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Sat Jun 21 15:05:41 2025 +0000
@@ -0,0 +1,48 @@
+# Galaxy-Ludwig
+A library of Galaxy deep learning tools based on Ludwig.
+
+# Install Galaxy-Ludwig into Galaxy
+We assume that you have Galaxy running and docker installed in your server/laptop. 
+* Create a new folder named ludwig(or whatever) under Galaxy’s tools folder.
+* Select and download the branch you want to install and use. Copy all XML, py files under the tools folder in this repo to the ludwig folder(the folder you created in the last step).
+* Update `tool_conf.xml` to include Galaxy-Ludwig. See [documentation](https://docs.galaxyproject.org/en/master/admin/tool_panel.html) for more details. This is an example:
+```
+<section id="ludwig" name="Ludwig Applications">
+  <tool file="ludwig/ludwig_evaluate.xml" />
+  <tool file="ludwig/ludwig_experiment.xml" />
+  <tool file="ludwig/ludwig_hyperopt.xml" />
+  <tool file="ludwig/ludwig_predict.xml" />
+  <tool file="ludwig/ludwig_render_config.xml" />
+  <tool file="ludwig/ludwig_train.xml" />
+  <tool file="ludwig/ludwig_visualize.xml" />
+</section>
+```
+
+
+* This is an example of a `job_conf.yml` file that you can create to enable Docker for a local Galaxy instance where you want Ludwig-related jobs to run:
+
+```
+runners:
+  local:
+    load: galaxy.jobs.runners.local:LocalJobRunner
+    workers: 4
+execution:
+  default: local
+  environments:
+    local:
+      runner: local
+      docker_enabled: true
+```
+If you are using an older version of Galaxy, then `job_conf.xml` would be something you want to configure instead of `job_conf.yml`. Then you would want to configure destination instead of execution and environment. 
+See [documentation](https://docs.galaxyproject.org/en/master/admin/jobs.html#running-jobs-in-containers) for job_conf configuration. 
+* If you haven’t set `sanitize_all_html: false` in `galaxy.yml`, please set it to False to enable our HTML report functionality.
+
+# Get Galaxy-Ludwig docker image
+
+This step is optional.
+If you want to speed up your runs, execute the following command:
+```
+docker pull quay.io/goeckslab/galaxy-ludwig-gpu:0.10.1
+```
+
+* Should be good to go. 
--- a/ludwig_autogenconfig.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_autogenconfig.py	Sat Jun 21 15:05:41 2025 +0000
@@ -3,7 +3,6 @@
 
 from ludwig import automl
 from ludwig.utils import defaults
-
 from pandas import read_csv
 
 logging.basicConfig(level=logging.DEBUG)
@@ -39,7 +38,7 @@
     # get the output feature name
     df = read_csv(args.dataset, nrows=2, sep=None, engine='python')
     names = df.columns.tolist()
-    target = names[args.output_feature-1]
+    target = names[args.output_feature - 1]
 
     args_init = ["--dataset", args.dataset,
                  "--target", target,
--- a/ludwig_evaluate.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_evaluate.py	Sat Jun 21 15:05:41 2025 +0000
@@ -3,10 +3,8 @@
 import sys
 
 from ludwig.evaluate import cli
-
 from ludwig_experiment import convert_parquet_to_csv, \
     generate_html_report, make_visualizations
-
 from model_unpickler import SafeUnpickler
 
 
--- a/ludwig_experiment.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_experiment.py	Sat Jun 21 15:05:41 2025 +0000
@@ -4,6 +4,7 @@
 import pickle
 import sys
 
+import pandas as pd
 from ludwig.experiment import cli
 from ludwig.globals import (
     DESCRIPTION_FILE_NAME,
@@ -13,11 +14,7 @@
 )
 from ludwig.utils.data_utils import get_split_path
 from ludwig.visualize import get_visualizations_registry
-
 from model_unpickler import SafeUnpickler
-
-import pandas as pd
-
 from utils import (
     encode_image_to_base64,
     get_html_closing,
@@ -35,7 +32,7 @@
 output_directory = None
 for ix, arg in enumerate(sys.argv):
     if arg == "--output_directory":
-        output_directory = sys.argv[ix+1]
+        output_directory = sys.argv[ix + 1]
         break
 
 viz_output_directory = os.path.join(output_directory, "visualizations")
--- a/ludwig_hyperopt.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_hyperopt.py	Sat Jun 21 15:05:41 2025 +0000
@@ -8,9 +8,7 @@
 )
 from ludwig.hyperopt_cli import cli
 from ludwig.visualize import get_visualizations_registry
-
 from model_unpickler import SafeUnpickler
-
 from utils import (
     encode_image_to_base64,
     get_html_closing,
@@ -84,7 +82,7 @@
 output_directory = None
 for ix, arg in enumerate(sys.argv):
     if arg == "--output_directory":
-        output_directory = sys.argv[ix+1]
+        output_directory = sys.argv[ix + 1]
         break
 
 hyperopt_stats_path = os.path.join(
--- a/ludwig_macros.xml	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_macros.xml	Sat Jun 21 15:05:41 2025 +0000
@@ -1,7 +1,7 @@
 <macros>
     <token name="@LUDWIG_VERSION@">0.10.1</token>
 
-    <token name="@SUFFIX@">0</token>
+    <token name="@SUFFIX@">1</token>
 
     <token name="@VERSION@">@LUDWIG_VERSION@+@SUFFIX@</token>
 
--- a/ludwig_predict.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_predict.py	Sat Jun 21 15:05:41 2025 +0000
@@ -3,9 +3,7 @@
 import sys
 
 from ludwig.predict import cli
-
 from ludwig_experiment import convert_parquet_to_csv
-
 from model_unpickler import SafeUnpickler
 
 
--- a/ludwig_render_config.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_render_config.py	Sat Jun 21 15:05:41 2025 +0000
@@ -2,6 +2,7 @@
 import logging
 import sys
 
+import yaml
 from ludwig.constants import (
     COMBINER,
     HYPEROPT,
@@ -13,7 +14,6 @@
 )
 from ludwig.schema.model_types.utils import merge_with_defaults
 
-import yaml
 
 logging.basicConfig(level=logging.DEBUG)
 LOG = logging.getLogger(__name__)
--- a/ludwig_train.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_train.py	Sat Jun 21 15:05:41 2025 +0000
@@ -3,13 +3,11 @@
 import sys
 
 from ludwig.train import cli
-
 from ludwig_experiment import (
     convert_parquet_to_csv,
     generate_html_report,
     make_visualizations
 )
-
 from model_unpickler import SafeUnpickler
 
 
--- a/ludwig_visualize.py	Tue Mar 18 20:52:00 2025 +0000
+++ b/ludwig_visualize.py	Sat Jun 21 15:05:41 2025 +0000
@@ -3,7 +3,6 @@
 import sys
 
 from ludwig.visualize import cli
-
 from model_unpickler import SafeUnpickler