Mercurial > repos > goeckslab > ludwig_train
view ludwig_train.xml @ 0:f0be10937f5c draft default tip
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
author | goeckslab |
---|---|
date | Tue, 07 Jan 2025 22:44:09 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="ludwig_train" name="Ludwig Train" version="@VERSION@" profile="@PROFILE@"> <description>trains a deep learning model</description> <macros> <import>ludwig_macros.xml</import> </macros> <expand macro="python_requirements_gpu" /> <expand macro="macro_stdio" /> <version_command>echo "@VERSION@"</version_command> <command> <![CDATA[ #import re #if $config cp '$config' "./config.yml"; #end if #if $dataset #set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier.strip()) ln -sf '$dataset' "./${sanitized_dataset}"; #end if #if $training_set #set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier.strip()) ln -sf '$training_set' "./${sanitized_training_set}"; #end if #if $validation_set #set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier.strip()) ln -sf '$validation_set' "./${sanitized_validation_set}"; #end if #if $test_set #set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier.strip()) ln -sf '$test_set' "./${sanitized_test_set}"; #end if #if $raw_data unzip -o -q '$raw_data' -d ./; #end if python '$__tool_directory__/ludwig_train.py' #if $config --config "./config.yml" #end if #if $model_load_path --model_load_path '${model_load_path.extra_files_path}' #end if #if $dataset --dataset "./${sanitized_dataset}" #end if #if $training_set --training_set "./${sanitized_training_set}" #end if #if $validation_set --validation_set "./${sanitized_validation_set}" #end if #if $test_set --test_set "./${sanitized_test_set}" #end if #if $training_set_metadata --training_set_metadata '$training_set_metadata' #end if #if $disable_parallel_threads --disable_parallel_threads #end if --output_directory "." --data_format '$data_format' --random_seed $random_seed --backend local && mkdir -p '${output_model.extra_files_path}' && cp -r experiment_run/model/*.json experiment_run/model/model_weights '${output_model.extra_files_path}' && mkdir -p '$output_report.extra_files_path' && cp experiment_run/*.json '$output_report.extra_files_path' && cp -r visualizations '$output_report.extra_files_path' && echo "Training is Done!" ]]> </command> <configfiles> <inputs name="inputs" /> </configfiles> <inputs> <param name="config" type="data" format="yaml" label="Select the dataset containing model configuration" /> <param name="model_load_path" type="data" format="ludwig_model" optional="true" label="Load a pretrained model as initialization" help="Optional." /> <param name="dataset" type="data" format="tabular,csv,h5,json,txt" optional="true" label="Input dataset" /> <param name="training_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input traning dataset" /> <param name="validation_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input validation dataset" /> <param name="test_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input test dataset" /> <param name="training_set_metadata" type="data" format="json" optional="true" label="Training set metadata" /> <param name="data_format" type="select" label="Data format"> <option value="auto" selected="true">auto</option> <option value="tsv">tsv</option> <option value="csv">csv</option> <option value="h5">h5</option> <option value="json">json</option> </param> <param name="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999" /> <param name="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" /> <param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/> </inputs> <outputs> <data format="ludwig_model" name="output_model" label="${tool.name} model on ${on_string}" /> <data format="html" name="output_report" from_work_dir="ludwig_train_report.html" label="${tool.name} report on ${on_string}" /> <collection type="list" name="output_pred_csv" label="${tool.name} training stat on ${on_string}" > <discover_datasets pattern="(?P<designation>.+)\.json" format="json" directory="experiment_run" /> </collection> </outputs> <tests> <test> <param name="dataset" value="temperature_la.csv" ftype="csv" /> <param name="config" value="temperature_config.yml" ftype="yaml" /> <param name="data_format" value="csv" /> <output name="output_report" file="ludwig_train_report_test.html" compare="sim_size" delta="10" > <assert_contents> <has_text text="Visualizations" /> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **What it does** Train a model. **Output** One trained ludwig_model type composite dataset. One html containing the training report. ]]> </help> <expand macro="macro_citations" /> </tool>