Mercurial > repos > immuneml > immuneml_tools
annotate immuneml_train_ml_model.xml @ 19:051d349fdc8c draft default tip
"planemo upload commit 5ffe9db26c26d30c923c812b69346d95948e9cd0"
author | immuneml |
---|---|
date | Tue, 05 Apr 2022 10:11:52 +0000 |
parents | cd57c1c66f8b |
children |
rev | line source |
---|---|
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
1 <tool id="immuneml_train_ml_model" name="Train machine learning models" version="@VERSION@.0"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
2 <description></description> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
3 <macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
4 <import>prod_macros.xml</import> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
5 </macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
7 <command><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
8 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
9 #if $iml_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
10 cp -r ${iml_input.extra_files_path}/result/* . && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
11 (mv repertoires/* . &>/dev/null || :) && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
12 rm -rf repertoires && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
13 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
14 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
15 #set $input_orig_names = [] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
16 #if $data_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
17 #for $input in $data_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
18 #set input_orig_names += [str($input.element_identifier)] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
19 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
20 #end for# |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
21 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
22 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
23 cp "$yaml_input" yaml_copy && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
24 immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
25 mv ${html_outfile.files_path}/index.html ${html_outfile} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
26 mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
27 mv ${html_outfile.files_path}/immuneML_output.zip $archive |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
28 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
29 </command> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
30 <inputs> |
16
cd57c1c66f8b
"planemo upload commit 60f280176ecdc2883fc7e85deb9aaa151f7c2088"
immuneml
parents:
15
diff
changeset
|
31 <param name="iml_input" type="data" format="immuneml_receptors" label="immuneML dataset" optional="true" help="This field accepts an ImmuneML dataset, as created by the Create Dataset tool."/> |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
32 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
33 <param name="data_input" type="data" multiple="true" label="Additional files" optional="true" help="This field should include individual repertoire files, metadata files, receptor data and others."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
34 </inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
35 <outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
36 <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
37 <data format="zip" name="archive" label="Archive: ML model training"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
38 <data format="html" name="html_outfile" label="Summary: ML model training"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
39 </outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
40 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
41 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
42 <help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
43 <![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
44 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
45 This tool can be used to run hyperparameter optimization over several different ML settings, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
46 which include ML models and their parameters, encodings and preprocessing steps. Nested cross-validation is used to identify the optimal combination of ML settings. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
47 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
48 This is a YAML-based Galaxy tool, if you prefer a button-based interface that assumes less ML knowledge, |
7
45ca02982e1f
"planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents:
6
diff
changeset
|
49 see `Train immune receptor classifiers (easy interface) <root?tool_id=immuneml_train_classifiers>`_ and |
45ca02982e1f
"planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents:
6
diff
changeset
|
50 `Train immune repertoire classifiers (easy interface) <root?tool_id=novice_immuneml_interface>`_. |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
51 |
6
2d3dd9ff7e84
"planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents:
3
diff
changeset
|
52 For more details on how to train ML models in Galaxy, see `the documentation <https://docs.immuneml.uio.no/latest/galaxy/galaxy_train_ml_models.html>`_. |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
53 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
54 **Tool output** |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
55 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
56 This Galaxy tool will produce the following history elements: |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
57 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
58 - Summary: ML model training: a HTML page that allows you to browse through all results, including prediction accuracies on |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
59 the various data splits and report results. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
60 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
61 - Archive: ML model training: a .zip file containing the complete output folder as it was produced by immuneML. This folder |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
62 contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
63 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
64 |
19
051d349fdc8c
"planemo upload commit 5ffe9db26c26d30c923c812b69346d95948e9cd0"
immuneml
parents:
16
diff
changeset
|
65 - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding). This .zip file can subsequently be used as an input when applying previously trained ML models to a new dataset. Currently, this can only be done locally using the command-line interface. |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
66 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
67 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
68 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
69 </help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
70 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
71 </tool> |