Mercurial > repos > goeckslab > ludwig_experiment
comparison ludwig_experiment.xml @ 0:78e6686a218e draft default tip
planemo upload for repository https://github.com/goeckslab/Galaxy-Ludwig.git commit bdea9430787658783a51cc6c2ae951a01e455bb4
author | goeckslab |
---|---|
date | Tue, 07 Jan 2025 22:45:39 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:78e6686a218e |
---|---|
1 <tool id="ludwig_experiment" name="Ludwig Experiment" version="@VERSION@" profile="@PROFILE@"> | |
2 <description>trains and evaluates a model</description> | |
3 <macros> | |
4 <import>ludwig_macros.xml</import> | |
5 </macros> | |
6 <expand macro="python_requirements_gpu" /> | |
7 <expand macro="macro_stdio" /> | |
8 <version_command>echo "@VERSION@"</version_command> | |
9 <command> | |
10 <![CDATA[ | |
11 #import re | |
12 #if $config | |
13 ln -sf '$config' "./config.yml"; | |
14 #end if | |
15 | |
16 #if $dataset | |
17 | |
18 #set $sanitized_dataset = re.sub('[^\w\-_\.]', '_', $dataset.element_identifier.strip()) | |
19 ln -sf '$dataset' "./${sanitized_dataset}"; | |
20 #end if | |
21 | |
22 #if $training_set | |
23 #set $sanitized_training_set = re.sub('[^\w\-_\.]', '_', $training_set.element_identifier.strip()) | |
24 ln -sf '$training_set' "./${sanitized_training_set}"; | |
25 #end if | |
26 | |
27 #if $validation_set | |
28 | |
29 #set $sanitized_validation_set = re.sub('[^\w\-_\.]', '_', $validation_set.element_identifier.strip()) | |
30 ln -sf '$validation_set' "./${sanitized_validation_set}"; | |
31 #end if | |
32 | |
33 #if $test_set | |
34 | |
35 #set $sanitized_test_set = re.sub('[^\w\-_\.]', '_', $test_set.element_identifier.strip()) | |
36 ln -sf '$test_set' "./${sanitized_test_set}"; | |
37 #end if | |
38 | |
39 #if $raw_data | |
40 unzip -o -q '$raw_data' -d ./; | |
41 #end if | |
42 | |
43 python '$__tool_directory__/ludwig_experiment.py' | |
44 #if $config | |
45 --config "./config.yml" | |
46 #end if | |
47 #if $model_load_path | |
48 --model_load_path '$model_load_path.extra_files_path' | |
49 #end if | |
50 #if $model_resume_path | |
51 --model_resume_path '$model_resume_path.model_resume_path' | |
52 #end if | |
53 #if $dataset | |
54 --dataset "./${sanitized_dataset}" | |
55 #end if | |
56 #if $training_set | |
57 --training_set "./${sanitized_training_set}" | |
58 #end if | |
59 #if $validation_set | |
60 --validation_set "./${sanitized_validation_set}" | |
61 #end if | |
62 #if $test_set | |
63 --test_set "./${sanitized_test_set}" | |
64 #end if | |
65 #if $training_set_metadata | |
66 --training_set_metadata '$training_set_metadata' | |
67 #end if | |
68 #if $disable_parallel_threads | |
69 --disable_parallel_threads | |
70 #end if | |
71 #if $k_fold | |
72 --k_fold '$k_fold' | |
73 #end if | |
74 --output_directory "." | |
75 --data_format '$data_format' | |
76 --backend local | |
77 --eval_split '$eval_split' | |
78 --random_seed $random_seed | |
79 --skip_save_unprocessed_output | |
80 --skip_save_k_fold_split_indices && | |
81 | |
82 mkdir -p '$output_model.extra_files_path' && | |
83 cp -r experiment_run/model/*.json experiment_run/model/model_weights '$output_model.extra_files_path' && | |
84 | |
85 echo "Experiment is Done!" | |
86 ]]> | |
87 </command> | |
88 <configfiles> | |
89 <inputs name="inputs" /> | |
90 </configfiles> | |
91 <inputs> | |
92 <param argument="config" type="data" format="yaml" label="Select the dataset containing model configuration" /> | |
93 <param argument="model_load_path" type="data" format="ludwig_model" optional="true" label="Load a pretrained model as initialization" help="Optional." /> | |
94 <param argument="model_resume_path" type="data" format="ludwig_model" optional="true" label="Load a pretained model to resume training" help="Optional." /> | |
95 <param argument="dataset" type="data" format="tabular,csv,h5,json,txt" optional="true" label="Input dataset" /> | |
96 <param argument="training_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input traning dataset" /> | |
97 <param argument="validation_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input validation dataset" /> | |
98 <param argument="test_set" type="data" format="tabular,csv,h5,json" optional="true" label="Input test dataset" /> | |
99 <param argument="training_set_metadata" type="data" format="json" optional="true" label="Training set metadata" /> | |
100 <param argument="data_format" type="select" label="Data format"> | |
101 <option value="auto" selected="true">auto</option> | |
102 <option value="tsv">tsv</option> | |
103 <option value="csv">csv</option> | |
104 <option value="h5">h5</option> | |
105 <option value="json">json</option> | |
106 </param> | |
107 <param argument="eval_split" type="select" label="Select the split portion for evaluation"> | |
108 <option value="training">training</option> | |
109 <option value="validation">validation</option> | |
110 <option value="test" selected="true">test</option> | |
111 <option value="full">full</option> | |
112 </param> | |
113 <param argument="k_fold" type="integer" value="" optional="true" label="number of folds for a k-fold cross validation run" min="2" max="10"/> | |
114 <param argument="random_seed" type="integer" value="42" label="Randomness seed" min="0" max="999999"/> | |
115 <param argument="disable_parallel_threads" type="boolean" checked="false" label="Whether to disable parallel threads for reproducibility?" /> | |
116 <!-- <param argument="skip_save_predictions" type="boolean" checked="false" label="Whether to skip saving predictions?" /> --> | |
117 <param name="raw_data" type="data" format="zip" optional="true" label="Raw data" help="Optional. Needed for images."/> | |
118 </inputs> | |
119 <outputs> | |
120 <data format="ludwig_model" name="output_model" label="${tool.name} trained model on ${on_string}" /> | |
121 <data format="html" name="output_report" from_work_dir="ludwig_experiment_report.html" label="${tool.name} report on ${on_string}" /> | |
122 <collection type="list" name="output_pred_csv" label="${tool.name} predictions CSVs/experiment stats on ${on_string}" > | |
123 <discover_datasets pattern="(?P<designation>predictions_parquet\.csv)" format="csv" directory="experiment_run" /> | |
124 <discover_datasets pattern="(?P<designation>.+)\.json" format="json" directory="experiment_run" /> | |
125 </collection> | |
126 </outputs> | |
127 <tests> | |
128 <test expect_num_outputs="3"> | |
129 <param name="dataset" value="temperature_la.csv" ftype="csv" /> | |
130 <param name="config" value="temperature_config.yml" ftype="yaml" /> | |
131 <param name="data_format" value="csv" /> | |
132 <output name="output_report" file="ludwig_experiment_report_test.html" compare="sim_size" delta="20000" > | |
133 <assert_contents> | |
134 <has_text text="Experiment" /> | |
135 </assert_contents> | |
136 </output> | |
137 | |
138 <output_collection name="output_pred_csv" type="list" > | |
139 <element name="predictions_parquet.csv" > | |
140 <assert_contents> | |
141 <has_n_columns n="1" /> | |
142 </assert_contents> | |
143 </element> | |
144 </output_collection> | |
145 </test> | |
146 </tests> | |
147 <help> | |
148 <![CDATA[ | |
149 **What it does** | |
150 Ludwig Experiment: train on one (portion of) dataset and evalue the model performance on another (portion of) dataset. | |
151 | |
152 | |
153 **Output** | |
154 An HTML containing the evaluation report of the trained model. | |
155 A trained Ludwig model composite dataset. | |
156 Predictions results (csv and jsons) from the model evaluations. | |
157 | |
158 | |
159 ]]> | |
160 </help> | |
161 <expand macro="macro_citations" /> | |
162 </tool> |