Mercurial > repos > goeckslab > ludwig_hyperopt

diff ludwig_hyperopt.xml @ 0:84f32596712d draft
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
author: goeckslab
date: Tue, 07 Jan 2025 22:44:34 +0000
children: b486ca953371
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ludwig_hyperopt.xml	Tue Jan 07 22:44:34 2025 +0000
@@ -0,0 +1,139 @@
+<tool id="ludwig_hyperopt" name="Ludwig Hyperopt" version="@VERSION@" profile="@PROFILE@">
+    <description>searches for optimal Hyperparameters</description>
+    <macros>
+        <import>ludwig_macros.xml</import>
+    </macros>
+    <expand macro="python_requirements_gpu" />
+    <expand macro="macro_stdio" />
+    <version_command>echo "@VERSION@"</version_command>
+    <command>
+        <![CDATA[
+            TMPDIR='/tmp';
+            TMP=\$TMPDIR;
+            TEMP=\$TMPDIR;
+            export TMPDIR TMP TEMP &&
+            #import re
+            #if $config
+            cp '$config' "./config.yml";
+            #end if
+
+            #if $dataset
+            #set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier.strip())
+            ln -sf '$dataset' "./${sanitized_dataset}";
+            #end if
+
+            #if $training_set
+            #set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier.strip())
+            ln -sf '$training_set' "./${sanitized_training_set}";
+            #end if
+
+            #if $validation_set
+            #set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier.strip())
+            ln -sf '$validation_set' "./${sanitized_validation_set}";
+            #end if
+
+            #if $test_set
+            #set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier.strip())
+            ln -sf '$test_set' "./${sanitized_test_set}";
+            #end if
+
+            #if $raw_data
+                unzip -o -q '$raw_data' -d ./;
+            #end if
+
+            python '$__tool_directory__/ludwig_hyperopt.py'
+                #if $config
+                --config "./config.yml"
+                #end if
+                #if $model_load_path
+                --model_load_path '$model_load_path.extra_files_path'
+                #end if
+                #if $dataset
+                --dataset "./${sanitized_dataset}"
+                #end if
+                #if $training_set
+                --training_set "./${sanitized_training_set}"
+                #end if
+                #if $validation_set
+                --validation_set "./${sanitized_validation_set}"
+                #end if
+                #if $test_set
+                --test_set "./${sanitized_test_set}"
+                #end if
+                #if $training_set_metadata
+                --training_set_metadata '$training_set_metadata'
+                #end if
+                ## #if not $save_model
+                ## --skip_save_model
+                ## #end if
+                --output_directory "."
+                --data_format '$data_format'
+                --random_seed $random_seed
+                --backend local
+                --skip_save_progress &&
+
+            hyperopt_stats_path='hyperopt_statistics.json' &&
+            mkdir -p '$output_hyperopt.extra_files_path' &&
+            cp "hyperopt/\$hyperopt_stats_path" '$output_hyperopt.extra_files_path' &&
+            cp -r visualizations '$output_hyperopt.extra_files_path' &&
+            best_trial=\$(cat ../outputs/tool_stdout | grep "^Current best trial:" | tail -1 | cut -d" " -f 4) &&
+            mkdir -p '$output_model.extra_files_path' &&
+            cp hyperopt/trial_\$best_trial/hyperopt_run/model/*.json hyperopt/trial_\$best_trial/hyperopt_run/model/model_weights '$output_model.extra_files_path'
+        ]]>
+    </command>
+    <configfiles>
+        <inputs name="inputs" />
+    </configfiles>
+    <inputs>
+        <param name="config" type="data" format="yaml" label="Select the dataset containing model configuration" />
+        <param name="model_load_path" type="data" format="ludwig_model" optional="true" label="Load a pretrained model as initialization" help="Optional." />
+        <param name="dataset" type="data" format="tabular,csv,h5,json,txt" optional="true" label="Input dataset" />
+        <param name="training_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input traning dataset" />
+        <param name="validation_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input validation dataset" />
+        <param name="test_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input test dataset" />
+        <param name="training_set_metadata" type="data" format="json" optional="true" label="Training set metadata" />
+        <param name="data_format" type="select" label="Data format">
+            <option value="auto" selected="true">auto</option>
+            <option value="tsv">tsv</option>
+            <option value="csv">csv</option>
+            <option value="h5">h5</option>
+            <option value="json">json</option>
+        </param>
+        <param name="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999" />
+        <!-- <param name="save_model" type="boolean" checked="true" label="Whether to save model?" /> -->
+        <param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
+    </inputs>       
+    <outputs>
+        <data format="ludwig_model" name="output_model" label="${tool.name} best model files on ${on_string}" >
+        </data>
+        <data format="html" name="output_hyperopt" from_work_dir="ludwig_hyperopt_report.html" label="${tool.name} report on ${on_string}" />
+        <data format="json" name="output_hyperopt_stats" from_work_dir="hyperopt/hyperopt_statistics.json" label="${tool.name} statistics on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="dataset" value="temperature_la.csv" ftype="csv" />
+            <param name="config" value="temperature_hyperopt.yml" ftype="yaml" />
+            <param name="data_format" value="csv" />
+            <output name="output_hyperopt" file="ludwig_hyperopt_report_test.html" compare="sim_size" delta="20000" >
+                <assert_contents>
+                    <has_text text="Hyperopt" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+            **What it does**
+            Hyperparameter tuning.
+
+            **Input**
+
+            **Output**
+
+            - Hyperopt report
+            - The best Ludwig model
+
+        ]]>
+    </help>
+    <expand macro="macro_citations" />
+</tool>
author	goeckslab
date	Tue, 07 Jan 2025 22:44:34 +0000
parents
children	b486ca953371