Mercurial > repos > immuneml > immuneml_tools
annotate immuneml_train_ml_model.xml @ 0:629e7e403e19 draft
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
author | immuneml |
---|---|
date | Thu, 01 Jul 2021 11:36:43 +0000 |
parents | |
children | ed3932e6d616 |
rev | line source |
---|---|
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
1 <tool id="immuneml_train_ml_model" name="Train machine learning models" version="@VERSION@.0"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
2 <description></description> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
3 <macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
4 <import>prod_macros.xml</import> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
5 </macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
7 <command><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
8 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
9 #if $iml_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
10 cp -r ${iml_input.extra_files_path}/result/* . && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
11 (mv repertoires/* . &>/dev/null || :) && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
12 rm -rf repertoires && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
13 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
14 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
15 #set $input_orig_names = [] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
16 #if $data_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
17 #for $input in $data_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
18 #set input_orig_names += [str($input.element_identifier)] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
19 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
20 #end for# |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
21 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
22 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
23 cp "$yaml_input" yaml_copy && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
24 immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
25 mv ${html_outfile.files_path}/index.html ${html_outfile} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
26 mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
27 mv ${html_outfile.files_path}/immuneML_output.zip $archive |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
28 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
29 </command> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
30 <inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
31 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
32 <param name="data_input" type="data" multiple="true" label="Additional files" optional="true" help="This field should include individual repertoire files, metadata files, receptor data and others."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
33 <param name="iml_input" type="data" format="iml_dataset" label="Dataset input" optional="true" help="This field accepts an ImmuneML dataset, as created by the Create Dataset tool."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
34 </inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
35 <outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
36 <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
37 <data format="zip" name="archive" label="Archive: ML model training"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
38 <data format="html" name="html_outfile" label="Summary: ML model training"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
39 </outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
40 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
41 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
42 <help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
43 <![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
44 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
45 This tool can be used to run hyperparameter optimization over several different ML settings, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
46 which include ML models and their parameters, encodings and preprocessing steps. Nested cross-validation is used to identify the optimal combination of ML settings. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
47 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
48 This is a YAML-based Galaxy tool, if you prefer a button-based interface that assumes less ML knowledge, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
49 see `Train immune receptor classifiers (easy interface) <https://galaxy.immuneml.uio.no/root?tool_id=immuneml_train_classifiers>`_ and |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
50 `Train immune repertoire classifiers (easy interface) <https://galaxy.immuneml.uio.no/root?tool_id=novice_immuneml_interface>`_. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
51 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
52 For more details on how to train ML models in Galaxy, see `the documentation <https://docs.immuneml.uio.no/galaxy/galaxy_train_ml_models.html>`_. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
53 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
54 **Tool output** |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
55 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
56 This Galaxy tool will produce the following history elements: |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
57 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
58 - Summary: ML model training: a HTML page that allows you to browse through all results, including prediction accuracies on |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
59 the various data splits and report results. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
60 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
61 - Archive: ML model training: a .zip file containing the complete output folder as it was produced by immuneML. This folder |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
62 contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
63 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
64 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
65 - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding, and |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
66 optionally preprocessing steps). This .zip file can subsequently be used as an input when `applying previously trained ML models to a new AIRR dataset in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_apply_ml_models.html>`_. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
67 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
68 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
69 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
70 </help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
71 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
72 </tool> |