comparison ludwig_train.xml @ 0:f0be10937f5c draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
author goeckslab
date Tue, 07 Jan 2025 22:44:09 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0be10937f5c
1 <tool id="ludwig_train" name="Ludwig Train" version="@VERSION@" profile="@PROFILE@">
2 <description>trains a deep learning model</description>
3 <macros>
4 <import>ludwig_macros.xml</import>
5 </macros>
6 <expand macro="python_requirements_gpu" />
7 <expand macro="macro_stdio" />
8 <version_command>echo "@VERSION@"</version_command>
9 <command>
10 <![CDATA[
11 #import re
12 #if $config
13 cp '$config' "./config.yml";
14 #end if
15
16 #if $dataset
17 #set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier.strip())
18 ln -sf '$dataset' "./${sanitized_dataset}";
19 #end if
20
21 #if $training_set
22 #set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier.strip())
23 ln -sf '$training_set' "./${sanitized_training_set}";
24 #end if
25
26 #if $validation_set
27 #set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier.strip())
28 ln -sf '$validation_set' "./${sanitized_validation_set}";
29 #end if
30
31 #if $test_set
32 #set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier.strip())
33 ln -sf '$test_set' "./${sanitized_test_set}";
34 #end if
35
36 #if $raw_data
37 unzip -o -q '$raw_data' -d ./;
38 #end if
39
40 python '$__tool_directory__/ludwig_train.py'
41 #if $config
42 --config "./config.yml"
43 #end if
44 #if $model_load_path
45 --model_load_path '${model_load_path.extra_files_path}'
46 #end if
47 #if $dataset
48 --dataset "./${sanitized_dataset}"
49 #end if
50 #if $training_set
51 --training_set "./${sanitized_training_set}"
52 #end if
53 #if $validation_set
54 --validation_set "./${sanitized_validation_set}"
55 #end if
56 #if $test_set
57 --test_set "./${sanitized_test_set}"
58 #end if
59 #if $training_set_metadata
60 --training_set_metadata '$training_set_metadata'
61 #end if
62 #if $disable_parallel_threads
63 --disable_parallel_threads
64 #end if
65 --output_directory "."
66 --data_format '$data_format'
67 --random_seed $random_seed
68 --backend local &&
69
70 mkdir -p '${output_model.extra_files_path}' &&
71 cp -r experiment_run/model/*.json experiment_run/model/model_weights '${output_model.extra_files_path}' &&
72 mkdir -p '$output_report.extra_files_path' &&
73 cp experiment_run/*.json '$output_report.extra_files_path' &&
74 cp -r visualizations '$output_report.extra_files_path' &&
75
76 echo "Training is Done!"
77 ]]>
78 </command>
79 <configfiles>
80 <inputs name="inputs" />
81 </configfiles>
82 <inputs>
83 <param name="config" type="data" format="yaml" label="Select the dataset containing model configuration" />
84 <param name="model_load_path" type="data" format="ludwig_model" optional="true" label="Load a pretrained model as initialization" help="Optional." />
85 <param name="dataset" type="data" format="tabular,csv,h5,json,txt" optional="true" label="Input dataset" />
86 <param name="training_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input traning dataset" />
87 <param name="validation_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input validation dataset" />
88 <param name="test_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input test dataset" />
89 <param name="training_set_metadata" type="data" format="json" optional="true" label="Training set metadata" />
90 <param name="data_format" type="select" label="Data format">
91 <option value="auto" selected="true">auto</option>
92 <option value="tsv">tsv</option>
93 <option value="csv">csv</option>
94 <option value="h5">h5</option>
95 <option value="json">json</option>
96 </param>
97 <param name="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999" />
98 <param name="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" />
99 <param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/>
100 </inputs>
101 <outputs>
102 <data format="ludwig_model" name="output_model" label="${tool.name} model on ${on_string}" />
103 <data format="html" name="output_report" from_work_dir="ludwig_train_report.html" label="${tool.name} report on ${on_string}" />
104 <collection type="list" name="output_pred_csv" label="${tool.name} training stat on ${on_string}" >
105 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.json" format="json" directory="experiment_run" />
106 </collection>
107 </outputs>
108 <tests>
109 <test>
110 <param name="dataset" value="temperature_la.csv" ftype="csv" />
111 <param name="config" value="temperature_config.yml" ftype="yaml" />
112 <param name="data_format" value="csv" />
113 <output name="output_report" file="ludwig_train_report_test.html" compare="sim_size" delta="10" >
114 <assert_contents>
115 <has_text text="Visualizations" />
116 </assert_contents>
117 </output>
118 </test>
119 </tests>
120 <help>
121 <![CDATA[
122 **What it does**
123 Train a model.
124
125
126 **Output**
127 One trained ludwig_model type composite dataset.
128 One html containing the training report.
129
130
131 ]]>
132 </help>
133 <expand macro="macro_citations" />
134 </tool>