annotate immuneml_train_ml_model.xml @ 19:051d349fdc8c draft default tip

"planemo upload commit 5ffe9db26c26d30c923c812b69346d95948e9cd0"
author immuneml
date Tue, 05 Apr 2022 10:11:52 +0000
parents cd57c1c66f8b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
1 <tool id="immuneml_train_ml_model" name="Train machine learning models" version="@VERSION@.0">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
2 <description></description>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
3 <macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
4 <import>prod_macros.xml</import>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
5 </macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
6 <expand macro="requirements" />
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
7 <command><![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
8
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
9 #if $iml_input
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
10 cp -r ${iml_input.extra_files_path}/result/* . &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
11 (mv repertoires/* . &>/dev/null || :) &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
12 rm -rf repertoires &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
13 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
14
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
15 #set $input_orig_names = []
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
16 #if $data_input
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
17 #for $input in $data_input
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
18 #set input_orig_names += [str($input.element_identifier)]
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
19 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
20 #end for#
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
21 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
22
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
23 cp "$yaml_input" yaml_copy &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
24 immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
25 mv ${html_outfile.files_path}/index.html ${html_outfile} &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
26 mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model} &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
27 mv ${html_outfile.files_path}/immuneML_output.zip $archive
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
28 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
29 </command>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
30 <inputs>
16
cd57c1c66f8b "planemo upload commit 60f280176ecdc2883fc7e85deb9aaa151f7c2088"
immuneml
parents: 15
diff changeset
31 <param name="iml_input" type="data" format="immuneml_receptors" label="immuneML dataset" optional="true" help="This field accepts an ImmuneML dataset, as created by the Create Dataset tool."/>
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
32 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
33 <param name="data_input" type="data" multiple="true" label="Additional files" optional="true" help="This field should include individual repertoire files, metadata files, receptor data and others."/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
34 </inputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
35 <outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
36 <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
37 <data format="zip" name="archive" label="Archive: ML model training"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
38 <data format="html" name="html_outfile" label="Summary: ML model training"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
39 </outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
40
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
41
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
42 <help>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
43 <![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
44
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
45 This tool can be used to run hyperparameter optimization over several different ML settings,
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
46 which include ML models and their parameters, encodings and preprocessing steps. Nested cross-validation is used to identify the optimal combination of ML settings.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
47
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
48 This is a YAML-based Galaxy tool, if you prefer a button-based interface that assumes less ML knowledge,
7
45ca02982e1f "planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents: 6
diff changeset
49 see `Train immune receptor classifiers (easy interface) <root?tool_id=immuneml_train_classifiers>`_ and
45ca02982e1f "planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents: 6
diff changeset
50 `Train immune repertoire classifiers (easy interface) <root?tool_id=novice_immuneml_interface>`_.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
51
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
52 For more details on how to train ML models in Galaxy, see `the documentation <https://docs.immuneml.uio.no/latest/galaxy/galaxy_train_ml_models.html>`_.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
53
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
54 **Tool output**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
55
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
56 This Galaxy tool will produce the following history elements:
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
57
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
58 - Summary: ML model training: a HTML page that allows you to browse through all results, including prediction accuracies on
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
59 the various data splits and report results.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
60
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
61 - Archive: ML model training: a .zip file containing the complete output folder as it was produced by immuneML. This folder
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
62 contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
63 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
64
19
051d349fdc8c "planemo upload commit 5ffe9db26c26d30c923c812b69346d95948e9cd0"
immuneml
parents: 16
diff changeset
65 - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding). This .zip file can subsequently be used as an input when applying previously trained ML models to a new dataset. Currently, this can only be done locally using the command-line interface.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
66
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
67 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
68
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
69 </help>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
70
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
71 </tool>